diff --git a/docs/user_guide/op_lists.rst b/docs/user_guide/op_lists.rst index 42426a76173af7e1fcc1db6e726a01238cbb3fd5..6c0b4246cd801a7a2c069c625cf8c980bdd7a1a6 100644 --- a/docs/user_guide/op_lists.rst +++ b/docs/user_guide/op_lists.rst @@ -24,6 +24,7 @@ Operator lists "EMBEDDING_LOOKUP","Y","" "EXPANDDIMS","Y","Only CPU and TensorFlow is supported." "FILL","Y","Only CPU and TensorFlow is supported." + "FLATTEN","Y","Only Caffe is supported." "FULLY_CONNECTED","Y","" "GROUP_CONV_2D","","Caffe model with group count = channel count is supported." "IDENTITY","Y","Only TensorFlow model is supported." @@ -35,13 +36,16 @@ Operator lists "PAD","Y","" "PSROI_ALIGN","Y","" "PRELU","Y","Only Caffe model is supported" + "PRIOR_BOX","Y","Only Caffe model is supported" "REDUCE_MEAN","Y","Only TensorFlow model is supported. For GPU only H + W axis reduce is supported." "RELU","Y","" "RELU1","Y","" "RELU6","Y","" "RELUX","Y","" "RESHAPE","Y","Limited support: GPU only supports softmax-like usage, CPU only supports the usage which not change the storage format." - "RESIZE_BILINEAR","Y","" + "RESIZE_BICUBIC","Y","Only Tensorflow is supported" + "RESIZE_BILINEAR","Y","Only Tensorflow is supported" + "RESIZE_NEAREST_NEIGHBOR","Y","Only Tensorflow is supported" "REVERSE","Y","Only CPU and Tensorflow is supported" "RNN","","" "RPN_PROPOSAL_LAYER","Y","" diff --git a/mace/ops/opencl/cl/resize_nearest_neighbor.cl b/mace/ops/opencl/cl/resize_nearest_neighbor.cl new file mode 100644 index 0000000000000000000000000000000000000000..1747686808c9d4780d4f7d801ceb4268469233f8 --- /dev/null +++ b/mace/ops/opencl/cl/resize_nearest_neighbor.cl @@ -0,0 +1,46 @@ +#include + +__kernel void resize_nearest_neighbor_nocache( + OUT_OF_RANGE_PARAMS + GLOBAL_WORK_GROUP_SIZE_DIM3 + __read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ + __write_only image2d_t output, + __private const float height_scale, + __private const float width_scale, + __private const int in_height, + __private const int in_width, + __private const int out_height, + __private const int align_corner) { + const int ch_blk = get_global_id(0); + const int w = get_global_id(1); + const int hb = get_global_id(2); + +#ifndef NON_UNIFORM_WORK_GROUP + if (ch_blk >= global_size_dim0 || w >= global_size_dim1 + || hb >= global_size_dim2) { + return; + } +#endif + const int ch_blks = global_size_dim0; + const int out_width = global_size_dim1; + + const int b = hb / out_height; + const int h = hb - mul24(b, out_height); + + const int h_in = min((align_corner) ? (int) round(h * height_scale) : + (int) floor(h * height_scale), in_height - 1); + const int w_in = min((align_corner) ? (int) round(w * width_scale) : + (int) floor(w * width_scale), in_width - 1); + + const int in_w_offset = mul24(ch_blk, in_width); + const int in_h_offset = mul24(b, in_height); + + const int out_w_offset = mul24(ch_blk, out_width); + const int out_h_offset = mul24(b, out_height); + + DATA_TYPE4 out = READ_IMAGET(input, SAMPLER, (int2)(in_w_offset + w_in, + in_h_offset + h_in)); + + WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out); +} + diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.h b/mace/ops/opencl/image/resize_nearest_neighbor.h new file mode 100644 index 0000000000000000000000000000000000000000..7527f00832b9e79391956bc210ff6b9cbe3da5ee --- /dev/null +++ b/mace/ops/opencl/image/resize_nearest_neighbor.h @@ -0,0 +1,179 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_ +#define MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_ + +#include "mace/ops/opencl/resize_nearest_neighbor.h" + +#include +#include +#include +#include +#include + +#include "mace/core/op_context.h" +#include "mace/core/tensor.h" +#include "mace/ops/opencl/helper.h" +#include "mace/ops/resize_nearest_neighbor.h" + +namespace mace { +namespace ops { +namespace opencl { +namespace image { +namespace resize_nearest_neighbor { +inline std::vector LocalWS(OpenCLRuntime *runtime, + const uint32_t *gws, + const uint32_t kwg_size) { + std::vector lws(4, 0); + if (kwg_size == 0) { + lws[0] = lws[1] = lws[2] = 1; + } else { + uint64_t + cache_size = runtime->device_global_mem_cache_size(); + uint32_t base = std::max(cache_size / kBaseGPUMemCacheSize, 1); + lws[1] = std::min(gws[1], kwg_size); + if (lws[1] >= base) { + lws[0] = std::min(gws[0], base); + } else { + lws[0] = gws[0] / 8; + if (lws[0] == 0) { + lws[0] = gws[0]; + } + } + lws[0] = std::min(lws[0], kwg_size / lws[1]); + const uint32_t lws_size = lws[0] * lws[1]; + lws[2] = gws[2] / 8; + if (lws[2] == 0) { + lws[2] = gws[2]; + } + lws[2] = std::max(std::min(lws[2], kwg_size / lws_size), + 1); + } + return lws; +} + +} // namespace resize_nearest_neighbor + +template +class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel { + public: + explicit ResizeNearestNeighborKernel(bool align_corners) + : align_corners_(align_corners) {} + + MaceStatus Compute( + OpContext *context, + const Tensor *input, + const Tensor *size, + Tensor *output) override; + + private: + bool align_corners_; + cl::Kernel kernel_; + uint32_t kwg_size_; + std::vector input_shape_; +}; + +template +MaceStatus ResizeNearestNeighborKernel::Compute( + OpContext *context, + const Tensor *input, + const Tensor *size, + Tensor *output) { + const index_t batch = input->dim(0); + const index_t in_height = input->dim(1); + const index_t in_width = input->dim(2); + const index_t channels = input->dim(3); + Tensor::MappingGuard input_mapper(input); + Tensor::MappingGuard size_mapper(size); + Tensor::MappingGuard output_mapper(output); + const index_t out_height = size->data()[0]; + const index_t out_width = size->data()[1]; + const index_t channel_blocks = RoundUpDiv4(channels); + + const uint32_t gws[3] = {static_cast(channel_blocks), + static_cast(out_width), + static_cast(out_height * batch)}; + + auto runtime = context->device()->gpu_runtime()->opencl_runtime(); + MACE_OUT_OF_RANGE_DEFINITION; + + if (kernel_.get() == nullptr) { + std::set built_options; + MACE_OUT_OF_RANGE_CONFIG; + MACE_NON_UNIFORM_WG_CONFIG; + std::string kernel_name = MACE_OBFUSCATE_SYMBOL( + "resize_nearest_neighbor_nocache"); + built_options.emplace("-Dresize_nearest_neighbor_nocache=" + kernel_name); + auto dt = DataTypeToEnum::value; + built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt)); + built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt)); + MACE_RETURN_IF_ERROR( + runtime->BuildKernel("resize_nearest_neighbor", + kernel_name, + built_options, + &kernel_)); + + kwg_size_ = + static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); + } + MACE_OUT_OF_RANGE_INIT(kernel_); + if (!IsVecEqual(input_shape_, input->shape())) { + MACE_CHECK(out_height > 0 && out_width > 0); + std::vector output_shape{batch, out_height, out_width, channels}; + + std::vector output_image_shape; + OpenCLUtil::CalImage2DShape(output_shape, OpenCLBufferType::IN_OUT_CHANNEL, + &output_image_shape); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); + + float height_scale = + mace::ops::resize_nearest_neighbor::CalculateResizeScale( + in_height, out_height, align_corners_); + float width_scale = + mace::ops::resize_nearest_neighbor::CalculateResizeScale( + in_width, out_width, align_corners_); + + uint32_t idx = 0; + MACE_OUT_OF_RANGE_SET_ARGS(kernel_); + MACE_SET_3D_GWS_ARGS(kernel_, gws); + kernel_.setArg(idx++, *(input->opencl_image())); + kernel_.setArg(idx++, *(output->opencl_image())); + kernel_.setArg(idx++, height_scale); + kernel_.setArg(idx++, width_scale); + kernel_.setArg(idx++, static_cast(in_height)); + kernel_.setArg(idx++, static_cast(in_width)); + kernel_.setArg(idx++, static_cast(out_height)); + kernel_.setArg(idx++, static_cast(align_corners_)); + + input_shape_ = input->shape(); + } + + const std::vector + lws = resize_nearest_neighbor::LocalWS(runtime, gws, kwg_size_); + std::string tuning_key = + Concat("resize_nearest_neighbor_opencl_kernel", output->dim(0), + output->dim(1), output->dim(2), output->dim(3)); + MACE_RETURN_IF_ERROR(TuningOrRun3DKernel(runtime, kernel_, tuning_key, + gws, lws, context->future())); + + MACE_OUT_OF_RANGE_VALIDATION; + return MaceStatus::MACE_SUCCESS; +} + +} // namespace image +} // namespace opencl +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_ diff --git a/mace/ops/opencl/resize_nearest_neighbor.h b/mace/ops/opencl/resize_nearest_neighbor.h new file mode 100644 index 0000000000000000000000000000000000000000..fda220aee9704228d435a304001a5f679f2d28e3 --- /dev/null +++ b/mace/ops/opencl/resize_nearest_neighbor.h @@ -0,0 +1,40 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_ +#define MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_ + +#include "mace/core/types.h" +#include "mace/public/mace.h" +#include "mace/utils/utils.h" + +namespace mace { + +class OpContext; +class Tensor; + +namespace ops { +class OpenCLResizeNearestNeighborKernel { + public: + virtual MaceStatus Compute( + OpContext *context, + const Tensor *input, + const Tensor *size, + Tensor *output) = 0; + MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeNearestNeighborKernel); +}; +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_ diff --git a/mace/ops/ops_registry.cc b/mace/ops/ops_registry.cc index 2fdd6a08dfada3f0546d08c797422fad15e2b93f..5780483aa22c6874230dbf61ffa6b7b30f96d769 100644 --- a/mace/ops/ops_registry.cc +++ b/mace/ops/ops_registry.cc @@ -49,6 +49,7 @@ extern void RegisterPriorBox(OpRegistryBase *op_registry); extern void RegisterReshape(OpRegistryBase *op_registry); extern void RegisterResizeBicubic(OpRegistryBase *op_registry); extern void RegisterResizeBilinear(OpRegistryBase *op_registry); +extern void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry); extern void RegisterReverse(OpRegistryBase *op_registry); extern void RegisterScalarMath(OpRegistryBase *op_registry); extern void RegisterShape(OpRegistryBase *op_registry); @@ -108,6 +109,7 @@ OpRegistry::OpRegistry() : OpRegistryBase() { ops::RegisterReshape(this); ops::RegisterResizeBicubic(this); ops::RegisterResizeBilinear(this); + ops::RegisterResizeNearestNeighbor(this); ops::RegisterReverse(this); ops::RegisterScalarMath(this); ops::RegisterShape(this); diff --git a/mace/ops/resize_bicubic_benchmark.cc b/mace/ops/resize_bicubic_benchmark.cc index 4a4fbc268cf47d175b517adb14c05d017908cc67..f8f9eb74cbaea11cc8888cedf45ee166853d2579 100644 --- a/mace/ops/resize_bicubic_benchmark.cc +++ b/mace/ops/resize_bicubic_benchmark.cc @@ -25,12 +25,12 @@ namespace test { namespace { template void ResizeBicubicBenchmark(int iters, - int batch, - int channels, - int input_height, - int input_width, - int output_height, - int output_width) { + int batch, + int channels, + int input_height, + int input_width, + int output_height, + int output_width) { mace::testing::StopTiming(); OpsTestNet net; diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc new file mode 100644 index 0000000000000000000000000000000000000000..7cbec0bcc1f8cfdcbe22db73cb3468e416044e4e --- /dev/null +++ b/mace/ops/resize_nearest_neighbor.cc @@ -0,0 +1,181 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/ops/resize_nearest_neighbor.h" + +#include +#include +#include + +#include "mace/core/operator.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/ops/opencl/image/resize_nearest_neighbor.h" +#endif // MACE_ENABLE_OPENCL + +namespace mace { +namespace ops { +template +inline void ResizeImageNCHW(const T *images, + const index_t batch_size, + const index_t in_height, + const index_t in_width, + const index_t out_height, + const index_t out_width, + const index_t channels, + const float height_scale, + const float width_scale, + bool align_corners, + T *output) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (index_t b = 0; b < batch_size; ++b) { + for (index_t c = 0; c < channels; ++c) { + const T + *channel_input_ptr = + images + (b * channels + c) * in_height * in_width; + T *channel_output_ptr = + output + (b * channels + c) * out_height * out_width; + for (index_t y = 0; y < out_height; ++y) { + const index_t in_y = std::min( + (align_corners) ? static_cast(roundf(y * height_scale)) + : static_cast(floorf(y * height_scale)), + in_height - 1); + for (int x = 0; x < out_width; ++x) { + const index_t in_x = std::min( + (align_corners) ? static_cast(roundf(x * width_scale)) + : static_cast(floorf(x * width_scale)), + in_width - 1); + channel_output_ptr[y * out_width + x] = + channel_input_ptr[in_y * in_width + in_x]; + } + } + } + } +} + +template +class ResizeNearestNeighborOp; + +template +class ResizeNearestNeighborOp : public Operation { + public: + explicit ResizeNearestNeighborOp(OpConstructContext *context) + : Operation(context), + align_corners_(Operation::GetOptionalArg("align_corners", + false)) {} + + MaceStatus Run(OpContext *context) override { + MACE_UNUSED(context); + const Tensor *input = this->Input(0); + const Tensor *size = this->Input(1); + Tensor *output = this->Output(0); + + MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1, + "input must be 4-dimensional and size must be 1-dimensional. ", + input->dim_size(), size->dim_size()); + + const index_t batch = input->dim(0); + const index_t channels = input->dim(1); + const index_t in_height = input->dim(2); + const index_t in_width = input->dim(3); + + const index_t out_height = size->data()[0]; + const index_t out_width = size->data()[1]; + MACE_CHECK(out_height > 0 && out_width > 0, out_height, out_width); + std::vector out_shape{batch, channels, out_height, out_width}; + MACE_RETURN_IF_ERROR(output->Resize(out_shape)); + Tensor::MappingGuard input_mapper(input); + Tensor::MappingGuard size_mapper(size); + Tensor::MappingGuard output_mapper(output); + const T *input_data = input->data(); + T *output_data = output->mutable_data(); + + if (out_height == in_height && out_width == in_width) { + std::copy(input_data, + input_data + batch * channels * in_height * in_width, + output_data); + return MaceStatus::MACE_SUCCESS; + } + + float height_scale = + resize_nearest_neighbor::CalculateResizeScale(in_height, + out_height, + align_corners_); + float width_scale = + resize_nearest_neighbor::CalculateResizeScale(in_width, + out_width, + align_corners_); + ResizeImageNCHW(input_data, + batch, + in_height, + in_width, + out_height, + out_width, + channels, + height_scale, + width_scale, + align_corners_, + output_data); + return MaceStatus::MACE_SUCCESS; + } + + private: + bool align_corners_; +}; + +#ifdef MACE_ENABLE_OPENCL +template +class ResizeNearestNeighborOp : public Operation { + public: + explicit ResizeNearestNeighborOp(OpConstructContext *context) + : Operation(context) { + bool align_corners = Operation::GetOptionalArg( + "align_corners", false); + if (context->device()->gpu_runtime()->UseImageMemory()) { + kernel_.reset(new opencl::image::ResizeNearestNeighborKernel( + align_corners)); + } else { + MACE_NOT_IMPLEMENTED; + } + } + MaceStatus Run(OpContext *context) override { + const Tensor *input = this->Input(0); + const Tensor *size = this->Input(1); + Tensor *output = this->Output(0); + MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1, + "input must be 4-dimensional and size must be 1-dimensional.", + input->dim_size(), size->dim_size()); + + return kernel_->Compute(context, input, size, output); + } + + private: + std::unique_ptr kernel_; +}; +#endif // MACE_ENABLE_OPENCL + +void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry) { + MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor", + ResizeNearestNeighborOp, DeviceType::CPU, float); + +#ifdef MACE_ENABLE_OPENCL + MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor", + ResizeNearestNeighborOp, DeviceType::GPU, float); + + MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor", + ResizeNearestNeighborOp, DeviceType::GPU, half); +#endif // MACE_ENABLE_OPENCL +} + +} // namespace ops +} // namespace mace diff --git a/mace/ops/resize_nearest_neighbor.h b/mace/ops/resize_nearest_neighbor.h new file mode 100644 index 0000000000000000000000000000000000000000..0f27a219daf17329328321bd9132fad6ab5b462c --- /dev/null +++ b/mace/ops/resize_nearest_neighbor.h @@ -0,0 +1,34 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_ +#define MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_ + +#include "mace/core/types.h" + +namespace mace { +namespace ops { +namespace resize_nearest_neighbor { +inline float CalculateResizeScale(index_t in_size, + index_t out_size, + bool align_corners) { + return (align_corners && out_size > 1) + ? (in_size - 1) / static_cast(out_size - 1) + : in_size / static_cast(out_size); +} +} // namespace resize_nearest_neighbor +} // namespace ops +} // namespace mace + +#endif // MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_ diff --git a/mace/ops/resize_nearest_neighbor_benchmark.cc b/mace/ops/resize_nearest_neighbor_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..d279cfe543f97b1b7448612d6d3ae023a3823e15 --- /dev/null +++ b/mace/ops/resize_nearest_neighbor_benchmark.cc @@ -0,0 +1,105 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mace/benchmark/statistics.h" +#include "mace/core/testing/test_benchmark.h" +#include "mace/ops/ops_test_util.h" + +namespace mace { +namespace ops { +namespace test { + +namespace { +template +void ResizeNearestNeighborBenchmark(int iters, + int batch, + int channels, + int input_height, + int input_width, + int output_height, + int output_width) { + mace::testing::StopTiming(); + + OpsTestNet net; + + // Add input data + std::vector size = {output_height, output_width}; + if (D == DeviceType::CPU) { + net.AddRandomInput("Input", + {batch, channels, input_height, input_width}); + net.AddInputFromArray("Size", {2}, size); + } else if (D == DeviceType::GPU) { + net.AddRandomInput("Input", + {batch, input_height, input_width, channels}); + net.AddInputFromArray("Size", {2}, size); + } else { + MACE_NOT_IMPLEMENTED; + } + + OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborBenchmark") + .Input("Input") + .Input("Size") + .Output("Output") + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Warm-up + for (int i = 0; i < 5; ++i) { + net.RunOp(D); + } + + mace::testing::StartTiming(); + while (iters--) { + net.RunOp(D); + } + net.Sync(); +} +} // namespace + +#define MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, TYPE, \ + DEVICE) \ + static void \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\ + ##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t macs = static_cast(iters) * \ + mace::benchmark::StatMACs("ResizeNearestNeighbor", \ + {}, {N, H1, W1, C}); \ + const int64_t tot = static_cast(iters) * N * C * H0 * W0; \ + mace::testing::MacsProcessed(macs); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + ResizeNearestNeighborBenchmark(iters, N, C, H0, W0, H1, W1);\ + } \ + MACE_BENCHMARK( \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\ + ##TYPE##_##DEVICE) + +#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU); + +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 15, 15, 30, 30); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 30, 30, 60, 60); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 240, 240, 480, 480); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 3, 4032, 3016, 480, 480); +MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 3, 480, 480, 4032, 3016); + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/resize_nearest_neighbor_test.cc b/mace/ops/resize_nearest_neighbor_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9d0ed8c6be2dcf03e583c7f6234f998b4aa04ffb --- /dev/null +++ b/mace/ops/resize_nearest_neighbor_test.cc @@ -0,0 +1,151 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mace/ops/ops_test_util.h" + +namespace mace { +namespace ops { +namespace test { + +class ResizeNearestNeighborTest : public OpsTestBase {}; + +TEST_F(ResizeNearestNeighborTest, CPUResizeNearestNeighborWOAlignCorners) { + testing::internal::LogToStderr(); + // Construct graph + OpsTestNet net; + + // Add input data + std::vector input(24); + std::iota(begin(input), end(input), 0); + std::vector size = {1, 2}; + net.AddInputFromArray("Input", {1, 2, 4, 3}, input); + net.TransformDataFormat("Input", NHWC, "InputNCHW", + NCHW); + net.AddInputFromArray("Size", {2}, size); + + OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest") + .Input("InputNCHW") + .Input("Size") + .Output("OutputNCHW") + .AddIntsArg("size", {1, 2}) + .Finalize(net.NewOperatorDef()); + + // Run + net.RunOp(); + net.TransformDataFormat("OutputNCHW", NCHW, "Output", + NHWC); + + // Check + auto expected = net.CreateTensor({1, 1, 2, 3}, {0, 1, 2, 6, 7, 8}); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); +} + +TEST_F(ResizeNearestNeighborTest, ResizeNearestNeighborWAlignCorners) { + testing::internal::LogToStderr(); + // Construct graph + OpsTestNet net; + + // Add input data + std::vector input(24); + std::iota(begin(input), end(input), 0); + std::vector size = {1, 2}; + net.AddInputFromArray("Input", {1, 2, 4, 3}, input); + net.TransformDataFormat("Input", NHWC, "InputNCHW", + NCHW); + net.AddInputFromArray("Size", {2}, size); + + OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest") + .Input("InputNCHW") + .Input("Size") + .Output("OutputNCHW") + .AddIntArg("align_corners", 1) + .AddIntsArg("size", {1, 2}) + .Finalize(net.NewOperatorDef()); + + // Run + net.RunOp(); + net.TransformDataFormat("OutputNCHW", NCHW, "Output", + NHWC); + + // Check + auto expected = net.CreateTensor({1, 1, 2, 3}, {0, 1, 2, 9, 10, 11}); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); +} + +namespace { +template +void TestRandomResizeNearestNeighbor() { + testing::internal::LogToStderr(); + static unsigned int seed = time(NULL); + for (int round = 0; round < 10; ++round) { + int batch = 1 + rand_r(&seed) % 5; + int channels = 1 + rand_r(&seed) % 100; + int in_height = 1 + rand_r(&seed) % 100; + int in_width = 1 + rand_r(&seed) % 100; + int align_corners = rand_r(&seed) % 1; + + // Construct graph + OpsTestNet net; + // Add input data + std::vector size = {20, 40}; + net.AddRandomInput("Input", + {batch, in_height, in_width, channels}); + net.TransformDataFormat("Input", NHWC, "InputNCHW", + NCHW); + net.AddInputFromArray("Size", + {2}, size); + + OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest") + .Input("InputNCHW") + .Input("Size") + .Output("OutputNCHW") + .AddIntArg("align_corners", align_corners) + .Finalize(net.NewOperatorDef()); + // Run on CPU + net.RunOp(DeviceType::CPU); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); + + auto expected = net.CreateTensor(); + expected->Copy(*net.GetOutput("Output")); + + if (D == DeviceType::GPU) { + OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest") + .Input("Input") + .Input("Size") + .Output("Output") + .AddIntArg("align_corners", align_corners) + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(D); + } + // Check + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5, + 1e-6); + } +} + +} // namespace + +TEST_F(ResizeNearestNeighborTest, RandomResizeNearestNeighbor) { + TestRandomResizeNearestNeighbor(); +} + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index f672f5ab27f55818f3d8e3245a967fc7d8c2acbe..e00a607d7dee19de0494e1068ae34423454a9e41 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -130,6 +130,7 @@ MaceSupportedOps = [ 'Reshape', 'ResizeBicubic', 'ResizeBilinear', + 'ResizeNearestNeighbor', 'Reverse', 'ScalarMath', 'Split', diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index e3ca146ab6c7af308fb1e0d565ef22996536e394..a45a84e0ea2b9ed8710ecbe9af33109b03766469 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -91,6 +91,7 @@ TFSupportedOps = [ 'Softmax', 'ResizeBicubic', 'ResizeBilinear', + 'ResizeNearestNeighbor', 'Placeholder', 'SpaceToBatchND', 'BatchToSpaceND', @@ -239,6 +240,7 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.Softmax.name: self.convert_softmax, TFOpType.ResizeBicubic.name: self.convert_resize_bicubic, TFOpType.ResizeBilinear.name: self.convert_resize_bilinear, + TFOpType.ResizeNearestNeighbor.name: self.convert_resize_nearest_neighbor, # noqa TFOpType.Placeholder.name: self.convert_nop, TFOpType.SpaceToBatchND.name: self.convert_space_batch, TFOpType.BatchToSpaceND.name: self.convert_space_batch, @@ -659,8 +661,15 @@ class TensorflowConverter(base_converter.ConverterInterface): align_corners_arg.name = MaceKeyword.mace_align_corners_str align_corners_arg.i = tf_op.get_attr(tf_align_corners) - def convert_space_batch(self, tf_op): + def convert_resize_nearest_neighbor(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.ResizeNearestNeighbor.name + align_corners_arg = op.arg.add() + align_corners_arg.name = MaceKeyword.mace_align_corners_str + align_corners_arg.i = tf_op.get_attr(tf_align_corners) + + def convert_space_batch(self, tf_op): op = self.convert_general_op(tf_op) del op.input[1:] diff --git a/repository/opencl-kernel/opencl_kernel_configure.bzl b/repository/opencl-kernel/opencl_kernel_configure.bzl index bab88f5398b02e922b9e3a03e93fd0e150635dad..759233fc77cf1e8a59d07a85dded244ad37ff035 100644 --- a/repository/opencl-kernel/opencl_kernel_configure.bzl +++ b/repository/opencl-kernel/opencl_kernel_configure.bzl @@ -1,72 +1,78 @@ """Repository rule for opencl encrypt kernel autoconfiguration, borrow from tensorflow """ -def _opencl_encrypt_kernel_impl(repository_ctx): - repository_ctx.template( - "BUILD", - Label("//repository/opencl-kernel:BUILD.tpl")) - mace_root_path = str(repository_ctx.path(Label("@mace//:BUILD")))[:-len("BUILD")] - generated_files_path = repository_ctx.path("gen") +def _opencl_encrypt_kernel_impl(repository_ctx): + repository_ctx.template( + "BUILD", + Label("//repository/opencl-kernel:BUILD.tpl"), + ) - ret = repository_ctx.execute( - ["test", "-f", "%s/.git/logs/HEAD" % mace_root_path]) - if ret.return_code == 0: - unused_var = repository_ctx.path(Label("//:.git/HEAD")) - ret = repository_ctx.execute( - ["test", "-f", "%s/.git/refs/heads/master" % mace_root_path]) - if ret.return_code == 0: - unused_var = repository_ctx.path(Label("//:.git/refs/heads/master")) + mace_root_path = str(repository_ctx.path(Label("@mace//:BUILD")))[:-len("BUILD")] + generated_files_path = repository_ctx.path("gen") - ret = repository_ctx.execute( - ["test", "-f", "%s/mace/ops/opencl/cl/common.h" % mace_root_path]) - if ret.return_code == 0: - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/activation.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/addn.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_norm.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_to_space.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/bias_add.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_to_image.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_transform.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/channel_shuffle.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/common.h")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/concat.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_3x3.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/crop.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/deconv_2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_deconv2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depth_to_space.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/eltwise.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/reduce.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bicubic.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bilinear.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/split.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax_buffer.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_batch.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_depth.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/sqrdiff_mean.cl")) - unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/winograd_transform.cl")) + ret = repository_ctx.execute( + ["test", "-f", "%s/.git/logs/HEAD" % mace_root_path], + ) + if ret.return_code == 0: + unused_var = repository_ctx.path(Label("//:.git/HEAD")) + ret = repository_ctx.execute( + ["test", "-f", "%s/.git/refs/heads/master" % mace_root_path], + ) + if ret.return_code == 0: + unused_var = repository_ctx.path(Label("//:.git/refs/heads/master")) - python_bin_path = repository_ctx.which("python") + ret = repository_ctx.execute( + ["test", "-f", "%s/mace/ops/opencl/cl/common.h" % mace_root_path], + ) + if ret.return_code == 0: + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/activation.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/addn.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_norm.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_to_space.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/bias_add.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_to_image.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_transform.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/channel_shuffle.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/common.h")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/concat.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_3x3.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/crop.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/deconv_2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_deconv2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depth_to_space.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/eltwise.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/reduce.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bicubic.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bilinear.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_nearest_neighbor.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/split.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax_buffer.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_batch.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_depth.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/sqrdiff_mean.cl")) + unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/winograd_transform.cl")) - repository_ctx.execute([ - python_bin_path, '%s/mace/python/tools/encrypt_opencl_codegen.py' % mace_root_path, - '--cl_kernel_dir=%s/mace/ops/opencl/cl' % mace_root_path, - '--output_path=%s/encrypt_opencl_kernel' % generated_files_path - ], quiet=False) + python_bin_path = repository_ctx.which("python") + repository_ctx.execute([ + python_bin_path, + "%s/mace/python/tools/encrypt_opencl_codegen.py" % mace_root_path, + "--cl_kernel_dir=%s/mace/ops/opencl/cl" % mace_root_path, + "--output_path=%s/encrypt_opencl_kernel" % generated_files_path, + ], quiet = False) encrypt_opencl_kernel_repository = repository_rule( implementation = _opencl_encrypt_kernel_impl, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 702ff738efb4f7f2485ca16121f9ee2f2fb05de8..6a945d13c2eb7e72a21fc37f675060e0ee419c28 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -766,6 +766,7 @@ def validate_model(abi, "--validation_threshold=%f" % validation_threshold, "--input_data_type=%s" % ",".join(input_data_types), "--backend=%s" % ",".join(backend), + "--log_file=%s" % log_file, _fg=True) six.print_("Validation done!\n")