diff --git a/mace/core/memory_optimizer.cc b/mace/core/memory_optimizer.cc index 004fb1a927ae9a15ad733ebcf61918c4983f99e0..0136e3f130e679a13f3e50ccc86c6c4bfda1e4f6 100644 --- a/mace/core/memory_optimizer.cc +++ b/mace/core/memory_optimizer.cc @@ -79,12 +79,10 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock( *op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL)); } std::vector image_shape; - if (shape.size() == 1) { - shape = {shape[0], 1, 1, 1}; - } else if (shape.size() == 2) { + if (shape.size() == 2) { shape = {shape[0], 1, 1, shape[1]}; } else { - MACE_CHECK(shape.size() == 4) << "GPU only support 1D/2D/4D input"; + MACE_CHECK(shape.size() == 4) << "GPU only support 2D/4D input"; } OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape); block.set_x(image_shape[0]); diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 6b2ac58a23e3ebbcb59e72300b682cd809263cca..8d4248570f7453002cc68024cd4017208da7e284 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -29,22 +29,21 @@ class ConcatOpBase : public Operation { public: explicit ConcatOpBase(OpConstructContext *context) : Operation(context), - axis_(Operation::GetOptionalArg("axis", 3)), - checked_(false) {} + axis_(Operation::GetOptionalArg("axis", 3)) {} protected: - void Validate() { + int FormatAxis() { const int32_t input_dims = this->Input(0)->dim_size(); axis_ = axis_ < 0 ? axis_ + input_dims : axis_; MACE_CHECK((0 <= axis_ && axis_ < input_dims), "Expected concatenating axis in the range [", -input_dims, ", ", input_dims, "], but got ", axis_); + return axis_; } protected: int axis_; - bool checked_; }; template @@ -54,20 +53,17 @@ template class ConcatOp : public ConcatOpBase { public: explicit ConcatOp(OpConstructContext *context) - : ConcatOpBase(context) {} + : ConcatOpBase(context), + has_data_format_(Operation::GetOptionalArg( + "has_data_format", 0) == 1) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); - if (!checked_) { - Validate(); - auto has_df = Operation::GetOptionalArg( - "has_data_format", 0); - if (has_df && this->Input(0)->dim_size() == 4) { - if (axis_ == 3) axis_ = 1; - else if (axis_ == 2) axis_ = 3; - else if (axis_ == 1) axis_ = 2; - } - checked_ = true; + int axis = FormatAxis(); + if (has_data_format_ && this->Input(0)->dim_size() == 4) { + if (axis == 3) axis = 1; + else if (axis == 2) axis = 3; + else if (axis == 1) axis = 2; } const std::vector &inputs = this->Inputs(); Tensor *output = this->Output(0); @@ -76,7 +72,7 @@ class ConcatOp : public ConcatOpBase { std::vector output_shape(input0->shape()); index_t inner_size = 1; - for (int i = 0; i < axis_; ++i) { + for (int i = 0; i < axis; ++i) { inner_size *= output_shape[i]; } std::vector outer_sizes(inputs_count, 0); @@ -86,14 +82,14 @@ class ConcatOp : public ConcatOpBase { MACE_CHECK(input->dim_size() == input0->dim_size(), "Ranks of all input tensors must be same."); for (int j = 0; j < input->dim_size(); ++j) { - if (j == axis_) { + if (j == axis) { continue; } MACE_CHECK(input->dim(j) == input0->dim(j), "Dimensions of inputs should equal except axis."); } outer_sizes[i] = input->size() / inner_size; - output_shape[axis_] += input->dim(axis_); + output_shape[axis] += input->dim(axis); } MACE_RETURN_IF_ERROR(output->Resize(output_shape)); @@ -119,6 +115,9 @@ class ConcatOp : public ConcatOpBase { return MaceStatus::MACE_SUCCESS; } + + private: + bool has_data_format_; }; #ifdef MACE_ENABLE_QUANTIZE @@ -130,7 +129,7 @@ class ConcatOp : public ConcatOpBase { MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); - Validate(); + int axis = FormatAxis(); const std::vector &inputs = this->Inputs(); Tensor *output = this->Output(0); MACE_CHECK(output->scale() != 0); @@ -139,7 +138,7 @@ class ConcatOp : public ConcatOpBase { std::vector output_shape(input0->shape()); index_t inner_size = 1; - for (int i = 0; i < axis_; ++i) { + for (int i = 0; i < axis; ++i) { inner_size *= output_shape[i]; } std::vector outer_sizes(inputs_count, 0); @@ -149,14 +148,14 @@ class ConcatOp : public ConcatOpBase { MACE_CHECK(input->dim_size() == input0->dim_size(), "Ranks of all input tensors must be same."); for (int j = 0; j < input->dim_size(); ++j) { - if (j == axis_) { + if (j == axis) { continue; } MACE_CHECK(input->dim(j) == input0->dim(j), "Dimensions of inputs should equal except axis."); } outer_sizes[i] = input->size() / inner_size; - output_shape[axis_] += input->dim(axis_); + output_shape[axis] += input->dim(axis); } MACE_RETURN_IF_ERROR(output->Resize(output_shape)); @@ -200,15 +199,14 @@ class ConcatOp : public ConcatOpBase { explicit ConcatOp(OpConstructContext *context) : ConcatOpBase(context) { if (context->device()->gpu_runtime()->UseImageMemory()) { - kernel_ = make_unique>(axis_); + kernel_ = make_unique>(); } else { MACE_NOT_IMPLEMENTED; } } MaceStatus Run(OpContext *context) override { - Validate(); Tensor *output = this->Output(0); - return kernel_->Compute(context, inputs_, output); + return kernel_->Compute(context, inputs_, FormatAxis(), output); } private: diff --git a/mace/ops/one_hot.cc b/mace/ops/one_hot.cc index 2077c6861ebffbecbf0f84572221ce0370db1a0c..1d243f202f1fa5ad65c4abd58892df2a31dd9155 100644 --- a/mace/ops/one_hot.cc +++ b/mace/ops/one_hot.cc @@ -16,9 +16,6 @@ #include #include "mace/core/operator.h" -#ifdef MACE_ENABLE_OPENCL -#include "mace/ops/opencl/image/one_hot.h" -#endif // MACE_ENABLE_OPENCL namespace mace { namespace ops { @@ -148,52 +145,9 @@ class OneHotOp : public OneHotOpBase { } }; -#ifdef MACE_ENABLE_OPENCL -template -class OneHotOp : public OneHotOpBase { - public: - explicit OneHotOp(OpConstructContext *context) : OneHotOpBase(context) { - if (context->device()->gpu_runtime()->UseImageMemory()) { - kernel_.reset(new opencl::image::OneHotKernel( - depth_, on_value_, off_value_, axis_)); - } else { - MACE_NOT_IMPLEMENTED; - } - } - MaceStatus Run(OpContext *context) override { - const Tensor *input = this->Input(0); - Tensor *output = this->Output(0); - - return kernel_->Compute(context, input, output); - } - - private: - std::unique_ptr kernel_; -}; -#endif // MACE_ENABLE_OPENCL void RegisterOneHot(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float); - -#ifdef MACE_ENABLE_OPENCL - MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, float); - MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, half); - - MACE_REGISTER_OP_CONDITION( - op_registry, - OpConditionBuilder("OneHot") - .SetDevicePlacerFunc( - [](OpConstructContext *context) -> std::set { - auto op = context->operator_def(); - if (op->output_shape_size() != op->output_size()) { - return { DeviceType::CPU, DeviceType::GPU }; - } - if (op->output_shape(0).dims_size() != 2) { - return { DeviceType::CPU }; - } - return { DeviceType::CPU, DeviceType::GPU }; - })); -#endif // MACE_ENABLE_OPENCL } } // namespace ops diff --git a/mace/ops/one_hot_benchmark.cc b/mace/ops/one_hot_benchmark.cc index a536f61a3df2a71998e62dc70e2781ab4434e6f9..ecb5ca8a165315c2729dae9af9f6f80d80d4f812 100644 --- a/mace/ops/one_hot_benchmark.cc +++ b/mace/ops/one_hot_benchmark.cc @@ -61,9 +61,7 @@ void OneHot(int iters, int batch, int depth, int axis) { MACE_BENCHMARK(MACE_BM_ONE_HOT_##N##_##DEPTH##_##AXIS##_##TYPE##_##DEVICE) #define MACE_BM_ONE_HOT(N, DEPTH, AXIS) \ - MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU); \ - MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, GPU); \ - MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, half, GPU); + MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU); MACE_BM_ONE_HOT(512, 16, 0); MACE_BM_ONE_HOT(512, 16, 1); diff --git a/mace/ops/one_hot_test.cc b/mace/ops/one_hot_test.cc index 07a9821c85d00b033d9e98336c7f68f51a4caf65..59594ceb82a552127be807a7a399c46fc13f5abc 100644 --- a/mace/ops/one_hot_test.cc +++ b/mace/ops/one_hot_test.cc @@ -45,7 +45,6 @@ void TestOneHot(const std::vector &input_shape, .AddFloatArg("on_value", on_value) .AddFloatArg("off_value", off_value) .AddIntArg("axis", axis) - .AddIntArg("data_format", DataFormat::NHWC) .Finalize(net.NewOperatorDef()); // Run @@ -77,10 +76,6 @@ TEST_F(OneHotTest, Dim1) { TestOneHot(input_shape, input_data, expected_shape, expected_data, 5, -1); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 5, -1); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 5, -1); expected_shape = {5, 10}; expected_data = { @@ -93,10 +88,6 @@ TEST_F(OneHotTest, Dim1) { TestOneHot(input_shape, input_data, expected_shape, expected_data, 5, 0); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 5, 0); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 5, 0); } TEST_F(OneHotTest, OnOffValue) { @@ -111,10 +102,6 @@ TEST_F(OneHotTest, OnOffValue) { TestOneHot(input_shape, input_data, expected_shape, expected_data, 6, -1, 7, 8); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 6, -1, 7, 8); - TestOneHot(input_shape, input_data, expected_shape, - expected_data, 6, -1, 7, 8); } TEST_F(OneHotTest, Dim2) { diff --git a/mace/ops/opencl/cl/one_hot.cl b/mace/ops/opencl/cl/one_hot.cl deleted file mode 100644 index 55645e73f6567c2bb9ecc21347f1da19975b6f7b..0000000000000000000000000000000000000000 --- a/mace/ops/opencl/cl/one_hot.cl +++ /dev/null @@ -1,78 +0,0 @@ -#include - -__kernel void one_hot(OUT_OF_RANGE_PARAMS - GLOBAL_WORK_GROUP_SIZE_DIM2 - __read_only image2d_t input, - __write_only image2d_t output, -#ifdef AXIS_0 - __private const int in_size, -#endif - __private const float on_value, - __private const float off_value) { - - const int channel_idx = get_global_id(0); - const int batch_idx = get_global_id(1); - -#ifndef NON_UNIFORM_WORK_GROUP - if (channel_idx >= global_size_dim0 || batch_idx >= global_size_dim1) { - return; - } -#endif - - DATA_TYPE4 out = off_value; - -#ifdef AXIS_0 - int in_idx = channel_idx * 4; - DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx)); - - if (in.s0 == batch_idx) { - out.s0 = on_value; - } - - if (++in_idx < in_size) { - in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx)); - - if (in.s0 == batch_idx) { - out.s1 = on_value; - } - - if (++in_idx < in_size) { - in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx)); - - if (in.s0 == batch_idx) { - out.s2 = on_value; - } - - if (++in_idx < in_size) { - in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx)); - - if (in.s0 == batch_idx) { - out.s3 = on_value; - } - } - } - } -#else - DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, batch_idx)); - int i = in.s0; - - if (i / 4 == channel_idx) { - switch (i % 4) { - case 0: - out.s0 = on_value; - break; - case 1: - out.s1 = on_value; - break; - case 2: - out.s2 = on_value; - break; - case 3: - out.s3 = on_value; - break; - } - } -#endif - - WRITE_IMAGET(output, (int2)(channel_idx, batch_idx), out); -} diff --git a/mace/ops/opencl/concat.h b/mace/ops/opencl/concat.h index abeec7c62e25299ac4de95e0b0dadc61bdb35900..5570a29430663ad7cf6b144044d4f5f022d97583 100644 --- a/mace/ops/opencl/concat.h +++ b/mace/ops/opencl/concat.h @@ -31,6 +31,7 @@ class OpenCLConcatKernel { virtual MaceStatus Compute( OpContext *context, const std::vector &input_list, + const int32_t axis, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel); }; diff --git a/mace/ops/opencl/helper.cc b/mace/ops/opencl/helper.cc index 46d4fd5b288d8463bfb44a5a879d9a93a5aebc70..912a8d8d87e549290cf5d174187d288c2462fcb1 100644 --- a/mace/ops/opencl/helper.cc +++ b/mace/ops/opencl/helper.cc @@ -34,8 +34,6 @@ std::vector FormatBufferShape( return buffer_shape; } else if (buffer_shape_size == 2) { // NC return {buffer_shape[0], 1, 1, buffer_shape[1]}; - } else if (buffer_shape_size == 1) { // N - return {buffer_shape[0], 1, 1, 1}; } else { LOG(FATAL) << "GPU only support 2D or 4D input and output"; } diff --git a/mace/ops/opencl/image/concat.h b/mace/ops/opencl/image/concat.h index e1d9b1e70d79021df775aec320bb2a0096931155..3a7af7ed15b35b6f35fb4e844f76c61b1f1a6985 100644 --- a/mace/ops/opencl/image/concat.h +++ b/mace/ops/opencl/image/concat.h @@ -48,14 +48,14 @@ MaceStatus ConcatN(OpContext *context, template class ConcatKernel : public OpenCLConcatKernel { public: - explicit ConcatKernel(const int32_t axis) : axis_(axis) {} + ConcatKernel() {} MaceStatus Compute( OpContext *context, const std::vector &input_list, + const int32_t axis, Tensor *output) override; private: - int32_t axis_; cl::Kernel kernel_; uint32_t kwg_size_; std::vector input_shape_; @@ -65,6 +65,7 @@ template MaceStatus ConcatKernel::Compute( OpContext *context, const std::vector &input_list, + const int32_t axis, Tensor *output) { const int inputs_count = input_list.size(); @@ -76,13 +77,13 @@ MaceStatus ConcatKernel::Compute( MACE_CHECK(input->dim_size() == input0->dim_size(), "Ranks of all input tensors must be same."); for (int j = 0; j < input->dim_size(); ++j) { - if (j == axis_) { + if (j == axis) { continue; } MACE_CHECK(input->dim(j) == input0->dim(j), "Dimensions of inputs should equal except axis."); } - output_shape[axis_] += input->dim(axis_); + output_shape[axis] += input->dim(axis); } std::vector image_shape; OpenCLUtil::CalImage2DShape(output_shape, diff --git a/mace/ops/opencl/image/one_hot.h b/mace/ops/opencl/image/one_hot.h deleted file mode 100644 index 5ba7ba797eb0aab3b8aa4c48664d3dbd55afc0d7..0000000000000000000000000000000000000000 --- a/mace/ops/opencl/image/one_hot.h +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_ -#define MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_ - -#include "mace/ops/opencl/one_hot.h" - -#include -#include -#include -#include - -#include "mace/core/op_context.h" -#include "mace/core/tensor.h" -#include "mace/ops/opencl/helper.h" - - -namespace mace { -namespace ops { -namespace opencl { -namespace image { - -template -class OneHotKernel : public OpenCLOneHotKernel { - public: - OneHotKernel(const int depth, const float on_value, - const float off_value, const int axis) - : depth_(depth), on_value_(on_value), - off_value_(off_value), axis_(axis) {} - - MaceStatus Compute( - OpContext *context, - const Tensor *input, - Tensor *output) override; - - private: - int depth_; - float on_value_; - float off_value_; - int axis_; - cl::Kernel kernel_; - uint32_t kwg_size_; - std::vector input_shape_; -}; - -template -MaceStatus OneHotKernel::Compute( - OpContext *context, - const Tensor *input, - Tensor *output) { - - auto input_shape = input->shape(); - index_t axis = axis_ == -1 ? input->dim_size() : axis_; - - MACE_CHECK(input->dim_size() == 1, "OneHot GPU only supports 1D input"); - MACE_CHECK(axis >= 0 && axis <= input->dim_size()); - - std::vector output_shape = - axis == 0 ? std::vector{depth_, input_shape[0]} : - std::vector{input_shape[0], depth_}; - std::vector output_image_shape{ - static_cast(RoundUpDiv4(output_shape[1])), - static_cast(output_shape[0])}; - MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - - auto runtime = context->device()->gpu_runtime()->opencl_runtime(); - MACE_OUT_OF_RANGE_DEFINITION; - - if (kernel_.get() == nullptr) { - std::set built_options; - MACE_OUT_OF_RANGE_CONFIG; - MACE_NON_UNIFORM_WG_CONFIG; - std::string kernel_name = MACE_OBFUSCATE_SYMBOL("one_hot"); - built_options.emplace("-Done_hot=" + kernel_name); - auto dt = DataTypeToEnum::value; - built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt)); - if (axis == 0) { - built_options.emplace("-DAXIS_0"); - } - MACE_RETURN_IF_ERROR(runtime->BuildKernel("one_hot", kernel_name, - built_options, &kernel_)); - - kwg_size_ = - static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); - } - - const uint32_t gws[2] = { - static_cast(output_image_shape[0]), - static_cast(output_image_shape[1]) - }; - MACE_OUT_OF_RANGE_INIT(kernel_); - - if (!IsVecEqual(input_shape_, input->shape())) { - int idx = 0; - MACE_OUT_OF_RANGE_SET_ARGS(kernel_); - MACE_SET_2D_GWS_ARGS(kernel_, gws); - kernel_.setArg(idx++, *(input->opencl_image())); - kernel_.setArg(idx++, *(output->opencl_image())); - if (axis == 0) { - kernel_.setArg(idx++, static_cast(input_shape[0])); - } - kernel_.setArg(idx++, on_value_); - kernel_.setArg(idx++, off_value_); - - input_shape_ = input->shape(); - } - - const std::vector lws = {kwg_size_ / 64, 64, 0}; - std::string tuning_key = Concat("one_hot", output->dim(0), output->dim(1)); - MACE_RETURN_IF_ERROR(TuningOrRun2DKernel(runtime, kernel_, tuning_key, - gws, lws, context->future())); - - MACE_OUT_OF_RANGE_VALIDATION; - return MaceStatus::MACE_SUCCESS; -} - -} // namespace image -} // namespace opencl -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_ diff --git a/mace/ops/opencl/one_hot.h b/mace/ops/opencl/one_hot.h deleted file mode 100644 index 6197258d948145901da52c4c649342edfa8b8a41..0000000000000000000000000000000000000000 --- a/mace/ops/opencl/one_hot.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_OPENCL_ONE_HOT_H_ -#define MACE_OPS_OPENCL_ONE_HOT_H_ - -#include "mace/public/mace.h" -#include "mace/utils/utils.h" -namespace mace { - -class OpContext; -class Tensor; - -namespace ops { -class OpenCLOneHotKernel { - public: - virtual MaceStatus Compute( - OpContext *context, - const Tensor *input, - Tensor *output) = 0; - MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLOneHotKernel); -}; - -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_OPENCL_ONE_HOT_H_ diff --git a/repository/opencl-kernel/opencl_kernel_configure.bzl b/repository/opencl-kernel/opencl_kernel_configure.bzl index 572219b161bf496b68c0949da53c6820554f13c9..63191cda20032c191992ea3624c13c121c585121 100644 --- a/repository/opencl-kernel/opencl_kernel_configure.bzl +++ b/repository/opencl-kernel/opencl_kernel_configure.bzl @@ -50,7 +50,6 @@ def _opencl_encrypt_kernel_impl(repository_ctx): unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/one_hot.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))