Minor fix.

4fdb724c · liuqi · f8dfff0d · 4fdb724c · 4fdb724c · 4fdb724c
Showing with 19 addition and 17 deletion

mace/kernels/opencl/winograd_transform.cc mace/kernels/opencl/winograd_transform.cc +4 -4

mace/kernels/reshape.h mace/kernels/reshape.h +1 -0

mace/ops/eltwise_benchmark.cc mace/ops/eltwise_benchmark.cc +14 -13

未找到文件。
--- a/mace/kernels/opencl/winograd_transform.cc
+++ b/mace/kernels/opencl/winograd_transform.cc
@@ -54,8 +54,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
    kernel_.setArg(idx++, static_cast<uint32_t>(paddings[1] / 2));
  }
-  const uint32_t gws[2] = {static_cast<size_t>(out_width),
+  const uint32_t gws[2] = {static_cast<uint32_t>(out_width),
-                         static_cast<size_t>(RoundUpDiv4(input_tensor->dim(3)))};
+                         static_cast<uint32_t>(RoundUpDiv4(input_tensor->dim(3)))};
  const std::vector<uint32_t> lws = {128, 8, 1};
  std::stringstream ss;
  ss << "winograd_transform_kernel_"
@@ -126,8 +126,8 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
    kernel_.setArg(idx++, prelu_alpha_);
  }
-  const uint32_t gws[2] = {static_cast<size_t>(input_tensor->dim(2)),
+  const uint32_t gws[2] = {static_cast<uint32_t>(input_tensor->dim(2)),
-                         static_cast<size_t>(RoundUpDiv4(input_tensor->dim(1)))};
+                         static_cast<uint32_t>(RoundUpDiv4(input_tensor->dim(1)))};
  const std::vector<uint32_t> lws = {128, 8, 1};
  std::stringstream ss;

--- a/mace/kernels/reshape.h
+++ b/mace/kernels/reshape.h
@@ -20,6 +20,7 @@ struct ReshapeFunctor {
                  Tensor *output,
                  StatsFuture *future) {
    output->Resize(out_shape);
+    // TODO copy on write to avoid this copy.
    output->CopyBytes(input->raw_data(), input->size() * sizeof(T));
  }
 };

--- a/mace/ops/eltwise_benchmark.cc
+++ b/mace/ops/eltwise_benchmark.cc
@@ -15,8 +15,8 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
  OpsTestNet net;
  // Add input data
-  net.AddRandomInput<D, float>("Input0", {n, h, w, c});
+  net.AddRandomInput<D, T>("Input0", {n, h, w, c});
-  net.AddRandomInput<D, float>("Input1", {n, h, w, c});
+  net.AddRandomInput<D, T>("Input1", {n, h, w, c});
  if (D == DeviceType::OPENCL) {
    BufferToImage<D, half>(net, "Input0", "InputImg0", kernels::BufferType::IN_OUT_CHANNEL);
@@ -26,7 +26,7 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
        .Input("InputImg1")
        .AddIntArg("type", static_cast<int>(type))
        .AddFloatsArg("coeff", {1.2, 2.1})
-        .AddIntArg("T", static_cast<int>(DT_HALF))
+        .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
        .Output("OutputImg")
        .Finalize(net.NewOperatorDef());
  } else {
@@ -63,16 +63,17 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
  }                                                                         \
  BENCHMARK(BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
-#define BM_ELTWISE(ELT_TYPE, N, H, W, C, TYPE)       \
+#define BM_ELTWISE(ELT_TYPE, N, H, W, C, )       \
-  BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, CPU); \
+  BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \
-  BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, OPENCL);
+  BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, OPENCL); \
+  BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, OPENCL);
-BM_ELTWISE(0, 1, 256, 256, 32, float);
+BM_ELTWISE(0, 1, 256, 256, 32);
-BM_ELTWISE(0, 1, 128, 128, 32, float);
+BM_ELTWISE(0, 1, 128, 128, 32);
-BM_ELTWISE(1, 1, 128, 128, 32, float);
+BM_ELTWISE(1, 1, 128, 128, 32);
-BM_ELTWISE(2, 1, 128, 128, 32, float);
+BM_ELTWISE(2, 1, 128, 128, 32);
-BM_ELTWISE(0, 1, 240, 240, 256, float);
+BM_ELTWISE(0, 1, 240, 240, 256);
-BM_ELTWISE(1, 1, 240, 240, 256, float);
+BM_ELTWISE(1, 1, 240, 240, 256);
-BM_ELTWISE(2, 1, 240, 240, 256, float);
+BM_ELTWISE(2, 1, 240, 240, 256);
 }  //  namespace mace