diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index a842ba719174505e115fd26bc8bd8cd7f2301898..f74eb191d885647a42bfef5ec81e44c01f09efed 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -54,8 +54,8 @@ void WinogradTransformFunctor::operator()(const Tensor *i kernel_.setArg(idx++, static_cast(paddings[1] / 2)); } - const uint32_t gws[2] = {static_cast(out_width), - static_cast(RoundUpDiv4(input_tensor->dim(3)))}; + const uint32_t gws[2] = {static_cast(out_width), + static_cast(RoundUpDiv4(input_tensor->dim(3)))}; const std::vector lws = {128, 8, 1}; std::stringstream ss; ss << "winograd_transform_kernel_" @@ -126,8 +126,8 @@ void WinogradInverseTransformFunctor::operator()(const Te kernel_.setArg(idx++, prelu_alpha_); } - const uint32_t gws[2] = {static_cast(input_tensor->dim(2)), - static_cast(RoundUpDiv4(input_tensor->dim(1)))}; + const uint32_t gws[2] = {static_cast(input_tensor->dim(2)), + static_cast(RoundUpDiv4(input_tensor->dim(1)))}; const std::vector lws = {128, 8, 1}; std::stringstream ss; diff --git a/mace/kernels/reshape.h b/mace/kernels/reshape.h index a185567f769214dcd5398d48845de5ea5d4a6346..4d37a19974d683c3f916e2ccc170dfac11ed94c7 100644 --- a/mace/kernels/reshape.h +++ b/mace/kernels/reshape.h @@ -20,6 +20,7 @@ struct ReshapeFunctor { Tensor *output, StatsFuture *future) { output->Resize(out_shape); + // TODO copy on write to avoid this copy. output->CopyBytes(input->raw_data(), input->size() * sizeof(T)); } }; diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index 8cd222333e62576719ae688fe347dbd9fc45c072..80a3f072b0c785bf26342408b6a91f9b98b63831 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -15,8 +15,8 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, OpsTestNet net; // Add input data - net.AddRandomInput("Input0", {n, h, w, c}); - net.AddRandomInput("Input1", {n, h, w, c}); + net.AddRandomInput("Input0", {n, h, w, c}); + net.AddRandomInput("Input1", {n, h, w, c}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input0", "InputImg0", kernels::BufferType::IN_OUT_CHANNEL); @@ -26,7 +26,7 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, .Input("InputImg1") .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", {1.2, 2.1}) - .AddIntArg("T", static_cast(DT_HALF)) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Output("OutputImg") .Finalize(net.NewOperatorDef()); } else { @@ -63,16 +63,17 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, } \ BENCHMARK(BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) -#define BM_ELTWISE(ELT_TYPE, N, H, W, C, TYPE) \ - BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, CPU); \ - BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, OPENCL); +#define BM_ELTWISE(ELT_TYPE, N, H, W, C, ) \ + BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \ + BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, OPENCL); \ + BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, OPENCL); -BM_ELTWISE(0, 1, 256, 256, 32, float); -BM_ELTWISE(0, 1, 128, 128, 32, float); -BM_ELTWISE(1, 1, 128, 128, 32, float); -BM_ELTWISE(2, 1, 128, 128, 32, float); -BM_ELTWISE(0, 1, 240, 240, 256, float); -BM_ELTWISE(1, 1, 240, 240, 256, float); -BM_ELTWISE(2, 1, 240, 240, 256, float); +BM_ELTWISE(0, 1, 256, 256, 32); +BM_ELTWISE(0, 1, 128, 128, 32); +BM_ELTWISE(1, 1, 128, 128, 32); +BM_ELTWISE(2, 1, 128, 128, 32); +BM_ELTWISE(0, 1, 240, 240, 256); +BM_ELTWISE(1, 1, 240, 240, 256); +BM_ELTWISE(2, 1, 240, 240, 256); } // namespace mace diff --git a/mace/ops/reshape.cc b/mace/ops/reshape.cc index c6ec12d646c59c1796c3d8a6f94eb9a77ed00508..d72052713c13d76bee21d9a44291e2197b71ffd5 100644 --- a/mace/ops/reshape.cc +++ b/mace/ops/reshape.cc @@ -8,9 +8,9 @@ namespace mace { void Register_Reshape(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Reshape") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), ReshapeOp); }