conv_2d_test.cc 52.3 KB
Newer Older
L
Liangliang He 已提交
1
// Copyright 2018 The MACE Authors. All Rights Reserved.
L
Liangliang He 已提交
2
//
L
Liangliang He 已提交
3 4 5
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
L
Liangliang He 已提交
6
//
L
Liangliang He 已提交
7 8 9 10 11 12 13
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
L
Liangliang He 已提交
14

15
#include <fstream>
L
liutuo 已提交
16 17
#include <vector>

李寅 已提交
18
#include "mace/ops/common/conv_pool_2d_util.h"
19
#include "mace/ops/ops_test_util.h"
L
Liangliang He 已提交
20

L
liutuo 已提交
21 22 23
namespace mace {
namespace ops {
namespace test {
L
Liangliang He 已提交
24

25 26 27
class Conv2dOpTest : public OpsTestBase {
 protected:
  virtual void SetUp() {
L
luxuhui 已提交
28
#ifdef MACE_ENABLE_OPENCL
29
    OpTestContext::Get()->SetOCLImageTestFlag();
L
luxuhui 已提交
30
#endif
31 32
  }
};
L
Liangliang He 已提交
33

34
namespace {
李寅 已提交
35
template <DeviceType D, typename T>
36
void TestNHWCSimple3x3VALID(int wino_blk_size = 0) {
L
liuqi 已提交
37 38
  OpsTestNet net;
  // Add input data
39
  net.AddInputFromArray<D, float>(
李寅 已提交
40 41
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
42
  net.AddInputFromArray<D, float>(
李寅 已提交
43 44
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
45 46
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
47 48
  net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
  const std::vector<index_t> output_shape = {1, 1, 1, 1};
L
liuqi 已提交
49

李寅 已提交
50
  if (D == DeviceType::CPU) {
51 52
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
53
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
54 55 56 57 58 59 60 61
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
李寅 已提交
62 63
    // Run
    net.RunOp(D);
64 65
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
66
  } else if (D == DeviceType::GPU) {
L
liuqi 已提交
67
    OpDefBuilder("Conv2D", "Conv2dTest")
68 69 70 71 72
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .OutputShape(output_shape)
李寅 已提交
73 74 75 76
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
77
        .AddIntArg("wino_block_size", wino_blk_size)
李寅 已提交
78
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
79 80 81

    net.RunOp(D);
  } else {
李寅 已提交
82
    MACE_NOT_IMPLEMENTED;
L
liuqi 已提交
83 84
  }

85
  auto expected = net.CreateTensor<float>(output_shape, {18.1f});
86 87 88 89 90
  if (DataTypeToEnum<T>::value == DataType::DT_FLOAT) {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
  } else {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3);
  }
L
liuqi 已提交
91 92
}

李寅 已提交
93
template <DeviceType D, typename T>
94
void TestNHWCSimple3x3SAME(int wino_blk_size = 0) {
L
liuqi 已提交
95 96 97
  OpsTestNet net;

  // Add input data
98
  net.AddInputFromArray<D, float>(
李寅 已提交
99 100
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
101
  net.AddInputFromArray<D, float>(
李寅 已提交
102 103
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
104 105
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
106 107
  net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
  const std::vector<index_t> output_shape = {1, 3, 3, 1};
L
liuqi 已提交
108

李寅 已提交
109
  if (D == DeviceType::CPU) {
110 111
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
112
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
113 114 115 116 117 118 119 120
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
李寅 已提交
121 122
    // Run
    net.RunOp(D);
123 124
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
125
  } else if (D == DeviceType::GPU) {
L
liuqi 已提交
126
    OpDefBuilder("Conv2D", "Conv2dTest")
127 128 129 130 131
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .OutputShape(output_shape)
李寅 已提交
132 133 134 135
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
136
        .AddIntArg("wino_block_size", wino_blk_size)
李寅 已提交
137
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
138 139 140
    // Run
    net.RunOp(D);
  } else {
李寅 已提交
141
    MACE_NOT_IMPLEMENTED;
L
liuqi 已提交
142 143
  }

144
  auto expected = net.CreateTensor<float>(
145
      output_shape,
李寅 已提交
146
      {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f});
L
liuqi 已提交
147

148 149 150 151 152
  if (DataTypeToEnum<T>::value == DataType::DT_FLOAT) {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
  } else {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3);
  }
L
liuqi 已提交
153
}
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212

template <DeviceType D, typename T>
void TestNHWCSimple3x3NeqStride(int wino_blk_size = 0) {
  OpsTestNet net;
  // Add input data
  net.AddInputFromArray<D, float>(
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
  net.AddInputFromArray<D, float>(
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
  net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
  const std::vector<index_t> output_shape = {1, 3, 2, 1};

  if (D == DeviceType::CPU) {
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
  } else if (D == DeviceType::GPU) {
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .OutputShape(output_shape)
        .AddIntsArg("strides", {1, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .AddIntArg("wino_block_size", wino_blk_size)
        .Finalize(net.NewOperatorDef());

    net.RunOp(D);
  } else {
    MACE_NOT_IMPLEMENTED;
  }

  auto expected = net.CreateTensor<float>(
      output_shape, {8.1f, 8.1f, 12.1f, 12.1f, 8.1f, 8.1f});
  if (DataTypeToEnum<T>::value == DataType::DT_FLOAT) {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
  } else {
    ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3);
  }
}
213
}  // namespace
L
liuqi 已提交
214 215

TEST_F(Conv2dOpTest, CPUSimple) {
L
liuqi 已提交
216 217
  TestNHWCSimple3x3VALID<DeviceType::CPU, float>();
  TestNHWCSimple3x3SAME<DeviceType::CPU, float>();
L
liuqi 已提交
218 219
}

L
liuqi 已提交
220
TEST_F(Conv2dOpTest, OPENCLSimple) {
221 222
  TestNHWCSimple3x3VALID<DeviceType::GPU, float>();
  TestNHWCSimple3x3SAME<DeviceType::GPU, float>();
L
liuqi 已提交
223 224
}

225 226 227 228 229
TEST_F(Conv2dOpTest, OPENCLHalfSimple) {
  TestNHWCSimple3x3VALID<DeviceType::GPU, half>();
  TestNHWCSimple3x3SAME<DeviceType::GPU, half>();
}

230 231 232 233 234
TEST_F(Conv2dOpTest, OPENCLSimpleWinograd) {
  TestNHWCSimple3x3SAME<DeviceType::GPU, float>(4);
  TestNHWCSimple3x3VALID<DeviceType::GPU, float>(2);
  TestNHWCSimple3x3VALID<DeviceType::GPU, half>(2);
  // TODO(liutuo): the precision of the last value is not normal.
235 236 237 238 239 240
  //  TestNHWCSimple3x3SAME<DeviceType::GPU, half>(4);
}

TEST_F(Conv2dOpTest, NotEqualStrideSimple) {
  TestNHWCSimple3x3NeqStride<DeviceType::CPU, float>();
  TestNHWCSimple3x3NeqStride<DeviceType::GPU, float>();
241 242
}

243
namespace {
李寅 已提交
244
template <DeviceType D, typename T>
L
liuqi 已提交
245 246 247 248
void TestNHWCSimple3x3WithoutBias() {
  OpsTestNet net;

  // Add input data
249
  net.AddInputFromArray<D, T>(
李寅 已提交
250 251
      "Input", {1, 3, 3, 2},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
252
  net.AddInputFromArray<D, T>(
李寅 已提交
253 254
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
255 256
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
L
liuqi 已提交
257

李寅 已提交
258
  if (D == DeviceType::CPU) {
259 260
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
261
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
262 263 264 265 266 267 268 269
        .Input("InputNCHW")
        .Input("Filter")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
李寅 已提交
270 271 272

    // Run
    net.RunOp(D);
273 274
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
275
  } else if (D == DeviceType::GPU) {
L
liuqi 已提交
276
    OpDefBuilder("Conv2D", "Conv2dTest")
277 278 279
        .Input("Input")
        .Input("Filter")
        .Output("Output")
李寅 已提交
280 281 282 283 284
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
285 286 287
    // Run
    net.RunOp(D);
  } else {
李寅 已提交
288
    MACE_NOT_IMPLEMENTED;
L
liuqi 已提交
289 290 291
  }

  // Check
292
  auto expected = net.CreateTensor<float>({1, 1, 1, 1}, {18.0f});
L
liuqi 已提交
293

294
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 1e-5);
L
liuqi 已提交
295
}
296
}  // namespace
L
liuqi 已提交
297 298

TEST_F(Conv2dOpTest, CPUWithoutBias) {
299
  TestNHWCSimple3x3WithoutBias<DeviceType::CPU, float>();
L
liuqi 已提交
300 301
}

302
TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
303
  TestNHWCSimple3x3WithoutBias<DeviceType::GPU, float>();
304 305
}

306
namespace {
李寅 已提交
307
template <DeviceType D, typename T>
308
void TestNHWCCombined3x3() {
L
liuqi 已提交
309 310 311 312
  // Construct graph
  OpsTestNet net;

  // Add input data
313
  net.AddInputFromArray<D, T>(
李寅 已提交
314 315 316
      "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                              1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
317
  net.AddInputFromArray<D, T>(
李寅 已提交
318 319 320
      "Filter", {2, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
321 322 323
       0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f},
      true);
  net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f}, true);
L
liuqi 已提交
324

李寅 已提交
325
  if (D == DeviceType::CPU) {
326 327
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
328
    OpDefBuilder("Conv2D", "Conv2DTest")
李寅 已提交
329 330 331 332 333 334 335 336 337
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {2, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
李寅 已提交
338 339
    // Run
    net.RunOp(D);
340 341
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
342
  } else if (D == DeviceType::GPU) {
L
liuqi 已提交
343
    OpDefBuilder("Conv2D", "Conv2DTest")
344 345 346 347
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
李寅 已提交
348 349 350 351 352
        .AddIntsArg("strides", {2, 2})
        .AddIntArg("padding", Padding::SAME)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
353 354 355
    // Run
    net.RunOp(D);
  } else {
李寅 已提交
356
    MACE_NOT_IMPLEMENTED;
L
liuqi 已提交
357 358 359
  }

  // Check
360
  auto expected = net.CreateTensor<float>(
李寅 已提交
361 362
      {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f,
                     9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f});
363
  ExpectTensorNear<float, T>(*expected, *net.GetOutput("Output"), 1e-5);
364
}
365
}  // namespace
L
liuqi 已提交
366

367 368
TEST_F(Conv2dOpTest, CPUStride2) {
  TestNHWCCombined3x3<DeviceType::CPU, float>();
L
liuqi 已提交
369 370
}

371
TEST_F(Conv2dOpTest, OPENCLStride2) {
372
  TestNHWCCombined3x3<DeviceType::GPU, float>();
L
Liangliang He 已提交
373 374
}

375
namespace {
李寅 已提交
376
template <DeviceType D, typename T>
377
void TestFusedNHWCSimple3x3VALID(int wino_blk_size = 0) {
378 379 380 381
  OpsTestNet net;
  // Add input data
  net.AddInputFromArray<D, float>(
      "Input", {1, 3, 3, 2},
李寅 已提交
382
      {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1});
383
  net.AddInputFromArray<D, float>(
384 385
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
386 387
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
388 389
  net.AddInputFromArray<D, float>("Bias", {1}, {-0.1f}, true);
  const std::vector<index_t> output_shape = {1, 1, 1, 1};
390 391

  if (D == DeviceType::CPU) {
392 393
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
394 395
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputNCHW")
396
        .Input("Filter")
397 398 399 400 401 402 403 404 405 406
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .AddStringArg("activation", "RELU")
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
407 408
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
409 410
  } else if (D == DeviceType::GPU) {
    OpDefBuilder("Conv2D", "Conv2DTest")
411 412 413 414 415
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .OutputShape(output_shape)
416 417 418 419 420
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .AddStringArg("activation", "RELU")
421
        .AddIntArg("wino_block_size", wino_blk_size)
422 423 424 425 426 427 428
        .Finalize(net.NewOperatorDef());

    net.RunOp(D);
  } else {
    MACE_NOT_IMPLEMENTED;
  }

429
  auto expected = net.CreateTensor<float>(output_shape, {0.0f});
430 431
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"));
}
李寅 已提交
432
template <DeviceType D, typename T>
433
void TestFusedNHWCSimple3x3WithoutBias(int wino_blk_size = 0) {
434 435 436 437 438
  OpsTestNet net;

  // Add input data
  net.AddInputFromArray<D, float>(
      "Input", {1, 3, 3, 2},
李寅 已提交
439
      {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1});
440
  net.AddInputFromArray<D, float>(
441 442
      "Filter", {1, 2, 3, 3},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
443 444
       1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
      true);
445
  const std::vector<index_t> output_shape = {1, 1, 1, 1};
446 447

  if (D == DeviceType::CPU) {
448 449
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
450 451
    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("InputNCHW")
452
        .Input("Filter")
453 454 455 456 457 458 459 460 461 462
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .AddStringArg("activation", "RELU")
        .Finalize(net.NewOperatorDef());

    // Run
    net.RunOp(D);
463 464
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
465 466
  } else if (D == DeviceType::GPU) {
    OpDefBuilder("Conv2D", "Conv2DTest")
467 468 469 470
        .Input("Input")
        .Input("Filter")
        .Output("Output")
        .OutputShape(output_shape)
471 472 473 474 475
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .AddStringArg("activation", "RELU")
476
        .AddIntArg("wino_block_size", wino_blk_size)
477 478 479 480 481 482 483 484
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  } else {
    MACE_NOT_IMPLEMENTED;
  }

  // Check
485
  auto expected = net.CreateTensor<float>(output_shape, {0.0f});
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501

  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"));
}

}  // namespace

TEST_F(Conv2dOpTest, FusedCPUSimple) {
  TestFusedNHWCSimple3x3VALID<DeviceType::CPU, float>();
  TestFusedNHWCSimple3x3WithoutBias<DeviceType::CPU, float>();
}

TEST_F(Conv2dOpTest, FusedOPENCLSimple) {
  TestFusedNHWCSimple3x3VALID<DeviceType::GPU, float>();
  TestFusedNHWCSimple3x3WithoutBias<DeviceType::GPU, float>();
}

502 503 504 505 506 507 508
TEST_F(Conv2dOpTest, FusedOPENCLSimpleWinograd) {
  TestFusedNHWCSimple3x3VALID<DeviceType::GPU, float>(2);
  TestFusedNHWCSimple3x3WithoutBias<DeviceType::GPU, float>(2);
  TestFusedNHWCSimple3x3VALID<DeviceType::GPU, float>(4);
  TestFusedNHWCSimple3x3WithoutBias<DeviceType::GPU, float>(4);
}

509
namespace {
李寅 已提交
510
template <DeviceType D>
511
void TestConv1x1() {
512
  // Construct graph
513
  OpsTestNet net;
L
Liangliang He 已提交
514 515

  // Add input data
516
  net.AddInputFromArray<D, float>(
李寅 已提交
517 518 519 520 521 522 523 524
      "Input", {1, 3, 10, 5},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
525
  net.AddInputFromArray<D, float>(
李寅 已提交
526
      "Filter", {2, 5, 1, 1},
527 528
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, true);
  net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}, true);
L
Liangliang He 已提交
529

李寅 已提交
530
  if (D == DeviceType::CPU) {
531 532
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
533
    OpDefBuilder("Conv2D", "Conv2DTest")
李寅 已提交
534 535 536 537 538 539 540 541
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
李寅 已提交
542 543
    // Run
    net.RunOp(D);
544 545
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
546
  } else if (D == DeviceType::GPU) {
547
    OpDefBuilder("Conv2D", "Conv2DTest")
548 549 550 551
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
李寅 已提交
552 553 554 555
        .AddIntsArg("strides", {1, 1})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
556 557 558
    // Run
    net.RunOp(D);
  } else {
李寅 已提交
559
    MACE_NOT_IMPLEMENTED;
560
  }
L
Liangliang He 已提交
561 562

  // Check
563
  auto expected = net.CreateTensor<float>(
李寅 已提交
564 565 566 567 568 569 570
      {1, 3, 10, 2},
      {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f});
L
Liangliang He 已提交
571

572
  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
L
Liangliang He 已提交
573
}
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635

template <DeviceType D>
void TestConv1x1NeqStride() {
  // Construct graph
  OpsTestNet net;

  // Add input data
  net.AddInputFromArray<D, float>(
      "Input", {1, 3, 10, 5},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
  net.AddInputFromArray<D, float>(
      "Filter", {2, 5, 1, 1},
      {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, true);
  net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}, true);

  if (D == DeviceType::CPU) {
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {1, 2})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
  } else if (D == DeviceType::GPU) {
    OpDefBuilder("Conv2D", "Conv2DTest")
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("Output")
        .AddIntsArg("strides", {1, 2})
        .AddIntArg("padding", Padding::VALID)
        .AddIntsArg("dilations", {1, 1})
        .Finalize(net.NewOperatorDef());
    // Run
    net.RunOp(D);
  } else {
    MACE_NOT_IMPLEMENTED;
  }

  // Check
  auto expected = net.CreateTensor<float>(
      {1, 3, 5, 2},
      {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
       5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f});

  ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
636
}  // namespace
L
Liangliang He 已提交
637

638
TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
L
liuqi 已提交
639

640
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::GPU>(); }
641

642 643 644 645 646 647 648 649
TEST_F(Conv2dOpTest, CPUConv1x1NotEqualStride) {
  TestConv1x1NeqStride<DeviceType::CPU>();
}

TEST_F(Conv2dOpTest, OPENCLConv1x1NotEqualStride) {
  TestConv1x1NeqStride<DeviceType::GPU>();
}

650
namespace {
李寅 已提交
651
template <DeviceType D, typename T>
652 653 654
void TestComplexConvNxN(const std::vector<index_t> &shape,
                        const std::vector<int> strides,
                        const int wino_blk_size = 0) {
李寅 已提交
655
  testing::internal::LogToStderr();
L
Liangliang He 已提交
656
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
657
                  Padding type) {
658
    // generate random input
U
Unknown 已提交
659
    static unsigned int seed = time(NULL);
L
liutuo 已提交
660
    index_t batch = 3 + (rand_r(&seed) % 10);
L
liuqi 已提交
661 662
    index_t height = shape[0];
    index_t width = shape[1];
663 664
    index_t input_channels = shape[2];
    index_t output_channels = shape[3];
李寅 已提交
665

L
liuqi 已提交
666
    OpsTestNet net;
李寅 已提交
667 668 669 670

    // Add input data
    net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
    net.AddRandomInput<D, T>(
Y
yejianwu 已提交
671 672 673
        "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true,
        false);
    net.AddRandomInput<D, T>("Bias", {output_channels}, true, false);
674 675
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
676 677

    // Construct graph
L
liuqi 已提交
678
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
679 680 681 682 683 684 685
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {1, 1})
Y
yejianwu 已提交
686
        .AddStringArg("activation", "LEAKYRELU")
687
        .AddFloatArg("activation_coefficient", 0.1)
李寅 已提交
688 689
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
690

L
liuqi 已提交
691 692
    // run on cpu
    net.RunOp();
李寅 已提交
693

694 695
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
李寅 已提交
696

L
liuqi 已提交
697
    // Check
698 699
    auto expected = net.CreateTensor<float>();
    expected->Copy(*net.GetOutput("Output"));
L
liuqi 已提交
700

L
liuqi 已提交
701
    // run on gpu
李寅 已提交
702
    OpDefBuilder("Conv2D", "Conv2dTest")
703 704 705 706 707
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("OPENCLOutput")
        .OutputShape(expected->shape())
李寅 已提交
708 709 710
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {1, 1})
Y
yejianwu 已提交
711
        .AddStringArg("activation", "LEAKYRELU")
712
        .AddFloatArg("activation_coefficient", 0.1)
李寅 已提交
713
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
714
        .AddIntArg("wino_block_size", wino_blk_size)
李寅 已提交
715
        .Finalize(net.NewOperatorDef());
L
liuqi 已提交
716 717
    // Run on device
    net.RunOp(D);
718
    ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
L
liuqi 已提交
719
                            1e-3);
720 721
  };

B
Bin Li 已提交
722
  for (int kernel_size : {1, 3, 5, 7}) {
723 724
    func(kernel_size, kernel_size, strides[0], strides[1], VALID);
    func(kernel_size, kernel_size, strides[0], strides[1], SAME);
725
  }
726
}
727
}  // namespace
L
liuqi 已提交
728

L
liuqi 已提交
729
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) {
730 731
  TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1});
  TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {2, 2});
L
liuqi 已提交
732 733
}

734
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNWinograd) {
735 736
  TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1}, 2);
  TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1}, 4);
737 738
}

L
liuqi 已提交
739
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) {
740 741
  TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1});
  TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {2, 2});
742 743
}

744
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWinograd) {
745 746
  TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1}, 4);
  TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1}, 2);
747 748
}

749
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) {
750 751 752 753 754 755 756
  TestComplexConvNxN<DeviceType::GPU, float>({31, 113, 13, 17}, {3, 3});
  TestComplexConvNxN<DeviceType::GPU, float>({32, 32, 13, 17}, {4, 4});
}

TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWithNotEqualStride) {
  TestComplexConvNxN<DeviceType::GPU, float>({31, 113, 13, 17}, {1, 2});
  TestComplexConvNxN<DeviceType::GPU, float>({32, 32, 13, 17}, {3, 4});
L
liuqi 已提交
757
}
758

759
namespace {
李寅 已提交
760
template <DeviceType D>
761 762
void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
                               const std::vector<index_t> &filter_shape,
763 764
                               const std::vector<int> &dilations,
                               const int wino_blk_size = 0) {
765 766
  testing::internal::LogToStderr();

767
  auto func = [&](index_t batch, int stride_h, int stride_w, Padding padding) {
768
    // generate random input
L
liuqi 已提交
769 770 771 772
    index_t height = input_shape[0];
    index_t width = input_shape[1];
    index_t kernel_h = filter_shape[0];
    index_t kernel_w = filter_shape[1];
773 774
    index_t input_channels = filter_shape[2];
    index_t output_channels = filter_shape[3];
775 776 777 778
    // Construct graph
    OpsTestNet net;

    std::vector<float> float_input_data;
779
    GenerateRandomRealTypeData({batch, height, width, input_channels},
Y
yejianwu 已提交
780
                               &float_input_data);
781
    std::vector<float> float_filter_data;
782
    GenerateRandomRealTypeData(
李寅 已提交
783 784
        {kernel_h, kernel_w, output_channels, input_channels},
        &float_filter_data);
785
    std::vector<float> float_bias_data;
Y
yejianwu 已提交
786
    GenerateRandomRealTypeData({output_channels}, &float_bias_data);
787 788
    // Add input data
    net.AddInputFromArray<D, float>(
李寅 已提交
789
        "Input", {batch, height, width, input_channels}, float_input_data);
790
    net.AddInputFromArray<D, float>(
李寅 已提交
791
        "Filter", {output_channels, input_channels, kernel_h, kernel_w},
792
        float_filter_data, true);
793
    net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data,
794
                                    true);
795

796 797
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
798 799

    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
800 801 802 803 804 805 806 807
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", padding)
        .AddIntsArg("dilations", {dilations[0], dilations[1]})
        .Finalize(net.NewOperatorDef());
李寅 已提交
808

809 810
    // run on cpu
    net.RunOp();
李寅 已提交
811

812 813
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
李寅 已提交
814

815
    // Check
816 817
    auto expected = net.CreateTensor<float>();
    expected->Copy(*net.GetOutput("Output"));
818 819 820

    // run on gpu
    OpDefBuilder("Conv2D", "Conv2dTest")
821 822 823 824 825
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("OPENCLOutput")
        .OutputShape(expected->shape())
李寅 已提交
826 827 828 829
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", padding)
        .AddIntsArg("dilations", {dilations[0], dilations[1]})
        .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
830
        .AddIntArg("wino_block_size", wino_blk_size)
李寅 已提交
831
        .Finalize(net.NewOperatorDef());
832 833
    // Run on device
    net.RunOp(D);
834
    ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2,
835
                            1e-2);
836 837
  };

838 839 840 841 842 843 844
  for (auto batch : {1, 5}) {
    func(batch, 1, 1, VALID);
    func(batch, 1, 1, SAME);
    if (dilations[0] == 1 && wino_blk_size == 0) {
      func(batch, 2, 2, VALID);
      func(batch, 2, 2, SAME);
    }
845 846
  }
}
847
}  // namespace
848

L
liuqi 已提交
849
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) {
李寅 已提交
850
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {1, 1, 32, 64}, {1, 1});
L
liuqi 已提交
851 852 853
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
李寅 已提交
854
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64}, {1, 1});
L
liuqi 已提交
855 856
}

857
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3Winograd) {
858 859 860 861 862
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64}, {1, 1},
                                             2);
  // TODO(liutuo) : the precision error is large.
  //  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64},
  //                                             {1, 1}, 4);
863 864
}

865
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv5x5S12) {
李寅 已提交
866 867
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {5, 5, 3, 64}, {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {5, 5, 3, 63}, {1, 1});
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x7S1) {
  TestHalfComplexConvNxNS12<DeviceType::GPU>({17, 17}, {1, 7, 192, 192},
                                             {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({17, 17}, {1, 7, 192, 191},
                                             {1, 1});
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x1S1) {
  TestHalfComplexConvNxNS12<DeviceType::GPU>({17, 17}, {7, 1, 192, 192},
                                             {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({17, 17}, {7, 1, 160, 192},
                                             {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({17, 17}, {7, 1, 160, 191},
                                             {1, 1});
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x7S12) {
李寅 已提交
887 888
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {7, 7, 3, 64}, {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {7, 7, 3, 63}, {1, 1});
889 890
}

L
liuqi 已提交
891
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) {
李寅 已提交
892 893
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {15, 1, 256, 2}, {1, 1});
  TestHalfComplexConvNxNS12<DeviceType::GPU>({64, 64}, {15, 1, 64, 2}, {1, 1});
B
Bin Li 已提交
894 895
  TestHalfComplexConvNxNS12<DeviceType::GPU>({256, 256}, {15, 1, 32, 2},
                                             {1, 1});
L
liuqi 已提交
896 897 898
}

TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) {
李寅 已提交
899
  TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {1, 15, 256, 2}, {1, 1});
B
Bin Li 已提交
900 901
  TestHalfComplexConvNxNS12<DeviceType::GPU>({256, 256}, {1, 15, 32, 2},
                                             {1, 1});
L
liuqi 已提交
902 903 904
}

TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) {
李寅 已提交
905
  TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {1, 1, 5, 7}, {1, 1});
L
liuqi 已提交
906 907
}

L
liuqi 已提交
908
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) {
李寅 已提交
909
  TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7}, {1, 1});
910 911
}

912 913
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3Winograd) {
  // TODO(liutuo) : the precision error is large.
914 915 916 917
  //  TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7},
  //                                             {1, 1}, 4);
  TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7}, {1, 1},
                                             2);
918 919
}

920
TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) {
李寅 已提交
921
  TestHalfComplexConvNxNS12<DeviceType::GPU>({64, 64}, {5, 5, 16, 16}, {2, 2});
922 923 924
}

TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) {
李寅 已提交
925
  TestHalfComplexConvNxNS12<DeviceType::GPU>({64, 64}, {7, 7, 16, 16}, {2, 2});
926 927 928
}

TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) {
李寅 已提交
929
  TestHalfComplexConvNxNS12<DeviceType::GPU>({63, 67}, {7, 7, 16, 16}, {4, 4});
930 931
}

932
namespace {
李寅 已提交
933
template <DeviceType D, typename T>
934 935
void TestDilationConvNxN(const std::vector<index_t> &shape,
                         const int dilation_rate) {
936 937 938 939 940 941 942 943 944 945 946
  testing::internal::LogToStderr();
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
                  Padding type) {
    srand(time(NULL));

    // generate random input
    index_t batch = 1;
    index_t height = shape[0];
    index_t width = shape[1];
    index_t input_channels = shape[2];
    index_t output_channels = shape[3];
李寅 已提交
947

948
    OpsTestNet net;
李寅 已提交
949 950 951 952

    // Add input data
    net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
    net.AddRandomInput<D, T>(
953 954
        "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true);
    net.AddRandomInput<D, T>("Bias", {output_channels}, true);
李寅 已提交
955

956 957
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
958 959

    // Construct graph
960
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
961 962 963 964 965 966 967 968 969
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {dilation_rate, dilation_rate})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
970 971 972

    // run on cpu
    net.RunOp();
973 974
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
李寅 已提交
975

976
    // Check
977 978
    auto expected = net.CreateTensor<float>();
    expected->Copy(*net.GetOutput("Output"));
979 980 981

    // run on gpu
    OpDefBuilder("Conv2D", "Conv2dTest")
982 983 984 985
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("OPENCLOutput")
李寅 已提交
986 987 988 989 990
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", {dilation_rate, dilation_rate})
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Finalize(net.NewOperatorDef());
991 992
    // Run on device
    net.RunOp(D);
993
    ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
李寅 已提交
994
                            1e-4);
995 996 997 998 999 1000 1001 1002
  };

  for (int kernel_size : {3}) {
    for (int stride : {1}) {
      func(kernel_size, kernel_size, stride, stride, VALID);
      func(kernel_size, kernel_size, stride, stride, SAME);
    }
  }
L
liuqi 已提交
1003
}
1004
}  // namespace
1005 1006

TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) {
1007
  TestDilationConvNxN<DeviceType::GPU, float>({32, 32, 32, 64}, 2);
1008 1009 1010
}

TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) {
1011
  TestDilationConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, 4);
1012 1013 1014
}

TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) {
1015 1016 1017 1018
  TestDilationConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, 4);
}

namespace {
李寅 已提交
1019
template <DeviceType D>
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
                               const std::vector<index_t> &filter_shape,
                               const std::vector<int> &dilations) {
  testing::internal::LogToStderr();
  auto func = [&](int stride_h, int stride_w, Padding type) {
    srand(time(NULL));

    // generate random input
    index_t batch = 1;
    index_t height = image_shape[0];
    index_t width = image_shape[1];
    index_t kernel_h = filter_shape[0];
    index_t kernel_w = filter_shape[1];
    index_t output_channels = filter_shape[2];
    index_t input_channels = filter_shape[3];

    OpsTestNet net;

    // Add input data
    net.AddRandomInput<D, float>("Input",
                                 {batch, height, width, input_channels});
    net.AddRandomInput<D, float>(
1042 1043
        "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true);
    net.AddRandomInput<D, float>("Bias", {output_channels}, true);
1044

1045 1046
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
1047 1048 1049
    // Construct graph
    OpDefBuilder("Conv2D", "Conv2dTest")
        .Input("InputNCHW")
1050
        .Input("Filter")
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", dilations)
        .Finalize(net.NewOperatorDef());

    // run on cpu
    net.RunOp();

1061 1062
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
1063
    // Check
1064 1065
    auto expected = net.CreateTensor<float>();
    expected->Copy(*net.GetOutput("Output"));
1066 1067 1068

    // run on gpu
    OpDefBuilder("Conv2D", "Conv2dTest")
1069 1070 1071 1072
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("OPENCLOutput")
1073 1074 1075 1076 1077 1078 1079 1080
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntArg("padding", type)
        .AddIntsArg("dilations", dilations)
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<half>::value))
        .Finalize(net.NewOperatorDef());
    // Run on device
    net.RunOp(D);

1081
    ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-2,
李寅 已提交
1082
                            1e-1);
1083 1084 1085 1086 1087 1088 1089 1090
  };

  func(1, 1, VALID);
  func(1, 1, SAME);
}
}  // namespace

TEST_F(Conv2dOpTest, OPENCLHalf7X7AtrousConvD2) {
李寅 已提交
1091
  TestGeneralHalfAtrousConv<DeviceType::GPU>({32, 32}, {7, 7, 16, 3}, {2, 2});
1092 1093 1094 1095
}

TEST_F(Conv2dOpTest, OPENCLHalf15X15AtrousConvD4) {
  TestGeneralHalfAtrousConv<DeviceType::GPU>({63, 71}, {15, 15, 16, 16},
李寅 已提交
1096
                                             {2, 2});
1097 1098
}

1099
namespace {
李寅 已提交
1100
template <DeviceType D, typename T>
1101
void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
1102 1103
                             const std::vector<int> &paddings,
                             const int wino_blk_size = 0) {
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
  testing::internal::LogToStderr();
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w) {
    srand(time(NULL));

    // generate random input
    index_t batch = 1;
    index_t height = shape[0];
    index_t width = shape[1];
    index_t input_channels = shape[2];
    index_t output_channels = shape[3];
李寅 已提交
1114

1115 1116 1117
    OpsTestNet net;

    // Add input data
1118 1119 1120 1121 1122
    net.AddRandomInput<D, float>("Input",
                                 {batch, height, width, input_channels});
    net.AddRandomInput<D, float>(
        "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true);
    net.AddRandomInput<D, float>("Bias", {output_channels}, true);
1123

1124 1125
    net.TransformDataFormat<DeviceType::CPU, float>(
        "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
李寅 已提交
1126 1127
    // Construct graph
    OpDefBuilder("Conv2D", "Conv2dTest")
李寅 已提交
1128 1129 1130 1131 1132 1133 1134
        .Input("InputNCHW")
        .Input("Filter")
        .Input("Bias")
        .Output("OutputNCHW")
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntsArg("padding_values", paddings)
        .Finalize(net.NewOperatorDef());
李寅 已提交
1135

1136 1137
    // run on cpu
    net.RunOp();
李寅 已提交
1138

1139 1140
    net.TransformDataFormat<DeviceType::CPU, float>(
        "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
李寅 已提交
1141

1142
    // Check
1143 1144
    auto expected = net.CreateTensor<float>();
    expected->Copy(*net.GetOutput("Output"));
1145 1146 1147

    // run on gpu
    OpDefBuilder("Conv2D", "Conv2dTest")
1148 1149 1150 1151 1152
        .Input("Input")
        .Input("Filter")
        .Input("Bias")
        .Output("OPENCLOutput")
        .OutputShape(expected->shape())
李寅 已提交
1153 1154 1155
        .AddIntsArg("strides", {stride_h, stride_w})
        .AddIntsArg("padding_values", paddings)
        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
1156
        .AddIntArg("wino_block_size", wino_blk_size)
李寅 已提交
1157
        .Finalize(net.NewOperatorDef());
1158 1159
    // Run on device
    net.RunOp(D);
1160 1161 1162 1163 1164 1165 1166
    if (DataTypeToEnum<T>::value == DT_HALF) {
      ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-1,
                              1e-2);
    } else {
      ExpectTensorNear<float>(*expected, *net.GetOutput("OPENCLOutput"), 1e-4,
                              1e-4);
    }
1167 1168
  };

1169 1170 1171 1172 1173 1174 1175
  if (wino_blk_size != 0) {
    func(3, 3, 1, 1);
  } else {
    for (int kernel_size : {3, 5, 7}) {
      for (int stride : {2, 3}) {
        func(kernel_size, kernel_size, stride, stride);
      }
1176 1177 1178
    }
  }
}
1179
}  // namespace
1180 1181

TEST_F(Conv2dOpTest, OPENCLAlignedPad1) {
1182
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({32, 32, 32, 64}, {1, 1});
1183 1184 1185
}

TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
李寅 已提交
1186
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, {2, 2});
1187 1188
}

1189
TEST_F(Conv2dOpTest, OPENCLAlignedPad2Winograd) {
1190 1191 1192 1193
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, {2, 2},
                                                  2);
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, {2, 2},
                                                  4);
1194 1195
}

1196
TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) {
1197
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, {4, 4});
1198 1199 1200 1201 1202 1203 1204 1205 1206
  TestArbitraryPadConvNxN<DeviceType::GPU, half>({107, 113, 5, 7}, {4, 4});
}

TEST_F(Conv2dOpTest, OPENCLUnalignedPad4Winograd) {
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, {1, 1}, 2);
  TestArbitraryPadConvNxN<DeviceType::GPU, half>({107, 113, 5, 7}, {1, 1}, 2);
  TestArbitraryPadConvNxN<DeviceType::GPU, float>({107, 113, 5, 7}, {4, 4}, 4);
  // TODO(liutuo) : the precision error is large.
  TestArbitraryPadConvNxN<DeviceType::GPU, half>({107, 113, 5, 7}, {4, 4}, 4);
1207
}
L
liutuo 已提交
1208

B
Bin Li 已提交
1209 1210 1211 1212 1213 1214 1215
namespace {

void TestQuantSimple3x3() {
  OpsTestNet net;

  // Add input data
  net.AddInputFromArray<DeviceType::CPU, uint8_t>(
1216 1217 1218
      "Filter", {1, 3, 3, 2}, {102, 150, 123, 135, 1, 216, 137, 47, 53, 75, 145,
                               130, 171, 62, 255, 122, 72, 211},
      true, 0.0226, 127);
B
Bin Li 已提交
1219
  net.AddInputFromArray<DeviceType::CPU, uint8_t>(
1220 1221 1222 1223 1224
      "Input", {1, 3, 3, 2}, {1, 75, 117, 161, 127, 119, 94, 151, 203, 151, 84,
                              61, 55, 142, 113, 139, 3, 255},
      false, 0.0204, 93);
  net.AddInputFromArray<DeviceType::CPU, int32_t>("Bias", {1}, {2}, true,
                                                  0.00046104, 0);
B
Bin Li 已提交
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243

  OpDefBuilder("Conv2D", "Conv2dTest")
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
      .AddIntsArg("strides", {1, 1})
      .AddIntArg("padding", Padding::VALID)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_UINT8))
      .Finalize(net.NewOperatorDef());

  net.Setup(DeviceType::CPU);
  Tensor *output = net.GetTensor("Output");
  output->SetScale(0.000711);
  output->SetZeroPoint(1);
  // Run
  net.Run();
  // Check
1244
  auto expected = net.CreateTensor<uint8_t>({1, 1, 1, 1}, {230});
B
Bin Li 已提交
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
  ExpectTensorNear<uint8_t>(*expected, *output);
}

void TestQuant(const index_t batch,
               const index_t out_channels,
               const index_t in_channels,
               const index_t in_height,
               const index_t in_width,
               const index_t k_height,
               const index_t k_width,
               enum Padding padding_type,
               const std::vector<int> &strides) {
  OpsTestNet net;
1258 1259 1260 1261
  net.AddRandomInput<CPU, float>("Input",
                                 {batch, in_height, in_width, in_channels});
  net.AddRandomInput<CPU, float>(
      "Filter", {out_channels, k_height, k_width, in_channels}, true);
1262
  net.AddRandomInput<CPU, float>("Bias", {out_channels}, true);
1263 1264
  net.TransformDataFormat<DeviceType::CPU, float>(
      "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
1265 1266
  net.TransformFilterDataFormat<DeviceType::CPU, float>(
      "Filter", DataFormat::OHWI, "FilterOIHW", DataFormat::OIHW);
B
Bin Li 已提交
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278

  OpDefBuilder("Conv2D", "Conv2dTest")
      .Input("InputNCHW")
      .Input("FilterOIHW")
      .Input("Bias")
      .Output("OutputNCHW")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_FLOAT))
      .Finalize(net.NewOperatorDef());
  net.RunOp(CPU);
1279 1280
  net.TransformDataFormat<DeviceType::CPU, float>(
      "OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
B
Bin Li 已提交
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312

  OpDefBuilder("Quantize", "QuantizeFilter")
      .Input("Filter")
      .Output("QuantizedFilter")
      .OutputType({DT_UINT8})
      .AddIntArg("T", DT_UINT8)
      .AddIntArg("non_zero", true)
      .Finalize(net.NewOperatorDef());
  net.RunOp();

  OpDefBuilder("Quantize", "QuantizeInput")
      .Input("Input")
      .Output("QuantizedInput")
      .OutputType({DT_UINT8})
      .AddIntArg("T", DT_UINT8)
      .AddIntArg("non_zero", true)
      .Finalize(net.NewOperatorDef());
  net.RunOp();

  OpDefBuilder("Quantize", "QuantizeOutput")
      .Input("Output")
      .Output("ExpectedQuantizedOutput")
      .OutputType({DT_UINT8})
      .AddIntArg("T", DT_UINT8)
      .AddIntArg("non_zero", true)
      .Finalize(net.NewOperatorDef());
  net.RunOp();

  Tensor *q_filter = net.GetTensor("QuantizedFilter");
  Tensor *q_input = net.GetTensor("QuantizedInput");
  Tensor *bias = net.GetTensor("Bias");
  auto bias_data = bias->data<float>();
B
Bin Li 已提交
1313
  float bias_scale = q_input->scale() * q_filter->scale();
B
Bin Li 已提交
1314
  std::vector<int32_t> q_bias(bias->size());
1315 1316 1317 1318
  QuantizeUtil<float, int32_t> quantize_util(
      OpTestContext::Get()->thread_pool());
  quantize_util.QuantizeWithScaleAndZeropoint(bias_data, bias->size(),
                                              bias_scale, 0, q_bias.data());
B
Bin Li 已提交
1319 1320 1321
  net.AddInputFromArray<DeviceType::CPU, int32_t>(
      "QuantizedBias", {out_channels}, q_bias, true, bias_scale, 0);

B
Bin Li 已提交
1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
  OpDefBuilder("Conv2D", "QuantizeConv2dTest")
      .Input("QuantizedInput")
      .Input("QuantizedFilter")
      .Input("QuantizedBias")
      .Output("QuantizedOutput")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_UINT8))
      .Finalize(net.NewOperatorDef());
  net.Setup(DeviceType::CPU);
  Tensor *eq_output = net.GetTensor("ExpectedQuantizedOutput");
  Tensor *q_output = net.GetTensor("QuantizedOutput");
  q_output->SetScale(eq_output->scale());
  q_output->SetZeroPoint(eq_output->zero_point());
  net.Run();

  OpDefBuilder("Dequantize", "DeQuantizeTest")
      .Input("QuantizedOutput")
      .Output("DequantizedOutput")
      .OutputType({DT_FLOAT})
      .AddIntArg("T", DT_UINT8)
      .Finalize(net.NewOperatorDef());
  net.RunOp();

  // Check
  ExpectTensorSimilar<float>(*net.GetOutput("Output"),
                             *net.GetTensor("DequantizedOutput"), 0.01);
}
}  // namespace

TEST_F(Conv2dOpTest, Quant) {
  TestQuantSimple3x3();
  TestQuant(1, 128, 64, 32, 32, 1, 1, VALID, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 3, 3, VALID, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 3, 3, FULL, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 3, 3, SAME, {2, 2});
  TestQuant(1, 129, 63, 33, 31, 3, 3, SAME, {1, 1});
  TestQuant(9, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 1, 5, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 5, 5, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 5, 1, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 7, 7, SAME, {1, 1});
  TestQuant(1, 128, 64, 32, 32, 7, 7, SAME, {2, 2});
  TestQuant(1, 128, 64, 32, 32, 7, 7, SAME, {3, 3});
}

B
Bin Li 已提交
1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
#ifdef MACE_ENABLE_BFLOAT16
namespace {
void TestBFloat16(const index_t batch,
                  const index_t out_channels,
                  const index_t in_channels,
                  const index_t in_height,
                  const index_t in_width,
                  const index_t k_height,
                  const index_t k_width,
                  enum Padding padding_type,
                  const std::vector<int> &strides) {
  OpsTestNet net;
  net.AddRandomInput<CPU, float>("Input",
                                 {batch, in_channels, in_height, in_width});
  net.AddRandomInput<CPU, float>(
      "Filter", {out_channels, in_channels, k_height, k_width}, true);
  net.AddRandomInput<CPU, float>("Bias", {out_channels}, true);
  net.Cast<CPU, float, BFloat16>("Input", "BF16Input");
  net.Cast<CPU, float, BFloat16>("Filter", "BF16Filter");
  net.Cast<CPU, float, BFloat16>("Bias", "BF16Bias");

  OpDefBuilder("Conv2D", "Conv2dTest")
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_FLOAT))
      .Finalize(net.NewOperatorDef());
  net.RunOp(CPU);

  OpDefBuilder("Conv2D", "BF16Conv2dTest")
      .Input("BF16Input")
      .Input("BF16Filter")
      .Input("BF16Bias")
      .Output("BF16Output")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_BFLOAT16))
      .Finalize(net.NewOperatorDef());
  net.RunOp(CPU);

  net.Cast<CPU, BFloat16, float>("BF16Output", "CastOutput");

  ExpectTensorSimilar<float>(*net.GetOutput("Output"),
                             *net.GetTensor("CastOutput"), 1e-4);
}
}  // namespace

TEST_F(Conv2dOpTest, BFloat16) {
  TestBFloat16(1, 128, 64, 32, 32, 1, 1, VALID, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 3, 3, VALID, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 3, 3, FULL, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 3, 3, SAME, {2, 2});
  TestBFloat16(1, 129, 63, 33, 31, 3, 3, SAME, {1, 1});
  TestBFloat16(9, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 1, 5, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 5, 5, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 5, 1, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {1, 1});
  TestBFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {2, 2});
  TestBFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {3, 3});
}
#endif  // MACE_ENABLE_BFLOAT16
L
lichao18 已提交
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
#ifdef MACE_ENABLE_FP16
namespace {
void TestFloat16(const index_t batch,
                 const index_t out_channels,
                 const index_t in_channels,
                 const index_t in_height,
                 const index_t in_width,
                 const index_t k_height,
                 const index_t k_width,
                 enum Padding padding_type,
                 const std::vector<int> &strides) {
  OpsTestNet net;
  net.AddRandomInput<CPU, float16_t>(
      "Input", {batch, in_channels, in_height, in_width});
  net.AddRandomInput<CPU, float16_t>(
      "Filter", {out_channels, in_channels, k_height, k_width}, true);
  net.AddRandomInput<CPU, float16_t>("Bias", {out_channels}, true);
  net.Cast<CPU, float, float16_t>("Input", "FP16Input");
  net.Cast<CPU, float, float16_t>("Filter", "FP16Filter");
  net.Cast<CPU, float, float16_t>("Bias", "FP16Bias");

  OpDefBuilder("Conv2D", "Conv2dTest")
      .Input("Input")
      .Input("Filter")
      .Input("Bias")
      .Output("Output")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_FLOAT))
      .Finalize(net.NewOperatorDef());
  net.RunOp(CPU);

  OpDefBuilder("Conv2D", "FP16Conv2dTest")
      .Input("FP16Input")
      .Input("FP16Filter")
      .Input("FP16Bias")
      .Output("FP16Output")
      .AddIntsArg("strides", strides)
      .AddIntArg("padding", padding_type)
      .AddIntsArg("dilations", {1, 1})
      .AddIntArg("T", static_cast<int>(DT_FLOAT16))
      .Finalize(net.NewOperatorDef());
  net.RunOp(CPU);

  net.Cast<CPU, float16_t, float>("FP16Output", "CastOutput");

  ExpectTensorSimilar<float>(*net.GetOutput("Output"),
                             *net.GetTensor("CastOutput"), 1e-4);
}
}  // namespace

TEST_F(Conv2dOpTest, float16_t) {
  TestFloat16(1, 128, 64, 32, 32, 1, 1, VALID, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 3, 3, VALID, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 3, 3, FULL, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 3, 3, SAME, {2, 2});
  TestFloat16(1, 129, 63, 33, 31, 3, 3, SAME, {1, 1});
  TestFloat16(9, 128, 64, 32, 32, 3, 3, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 1, 5, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 5, 5, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 5, 1, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {1, 1});
  TestFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {2, 2});
  TestFloat16(1, 128, 64, 32, 32, 7, 7, SAME, {3, 3});
}
#endif  // MACE_ENABLE_FP16
L
liutuo 已提交
1506 1507 1508
}  // namespace test
}  // namespace ops
}  // namespace mace