diff --git a/mace/kernels/opencl/cl/space_to_depth.cl b/mace/kernels/opencl/cl/space_to_depth.cl deleted file mode 100644 index b54ee2954546208aa3360e12aed6d49410a79c1a..0000000000000000000000000000000000000000 --- a/mace/kernels/opencl/cl/space_to_depth.cl +++ /dev/null @@ -1,25 +0,0 @@ -#include - -__kernel void space_to_depth(__read_only image2d_t input, - __private const int block_size, - __private const int input_depth, - __write_only image2d_t output) { - const int d = get_global_id(0); - const int w = get_global_id(1); - const int h = get_global_id(2); - const int input_width = get_global_size(1); - const int in_pos = mad24(d, input_width, w); - const int output_width = input_width / block_size; - - const int out_h = h / block_size; - const int offset_h = h % block_size; - const int out_w = w / block_size; - const int offset_w = w % block_size; - const int offset_d = (offset_h * block_size + offset_w) * input_depth; - const int out_d = d + offset_d; - const int out_pos = mad24(out_d, output_width, out_w); - - DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, h)); - - WRITE_IMAGET(output, (int2)(out_pos, out_h), in_data); -} diff --git a/mace/kernels/opencl/space_to_depth_opencl.cc b/mace/kernels/opencl/space_to_depth_opencl.cc deleted file mode 100644 index e5023104cb54442189866ceaa0c6fec322846cb9..0000000000000000000000000000000000000000 --- a/mace/kernels/opencl/space_to_depth_opencl.cc +++ /dev/null @@ -1,77 +0,0 @@ -// -// Copyright (c) 2018 XiaoMi All rights reserved. -// - -#include "mace/kernels/space_to_depth.h" -#include "mace/core/runtime/opencl/cl2_header.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" -#include "mace/utils/tuner.h" - -namespace mace { -namespace kernels { - -template -void SpaceToDepthOpFunctor::operator()( - const Tensor *input, - Tensor *output, - StatsFuture *future) { - const index_t batch_size = input->dim(0); - const index_t input_height = input->dim(1); - const index_t input_width = input->dim(2); - const index_t input_depth = input->dim(3); - - const index_t output_height = input_height / block_size_; - const index_t output_width = input_width / block_size_; - const index_t output_depth = input_depth * block_size_ * block_size_; - - std::vector output_shape = {batch_size, output_height, output_width, output_depth}; - - std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - output->ResizeImage(output_shape, image_shape); - - const int input_depth_blocks = RoundUpDiv4(input_depth); - - if (kernel_.get() == nullptr) { - auto runtime = OpenCLRuntime::Global(); - std::set built_options; - std::string kernel_name = MACE_OBFUSCATE_SYMBOL("space_to_depth"); - built_options.emplace("-Dspace_to_depth=" + kernel_name); - auto dt = DataTypeToEnum::value; - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - kernel_ = runtime->BuildKernel("space_to_depth", kernel_name, - built_options); - } - if (!IsVecEqual(input_shape_, input->shape())) { - uint32_t idx = 0; - kernel_.setArg(idx++, *(input->opencl_image())); - kernel_.setArg(idx++, block_size_); - kernel_.setArg(idx++, input_depth_blocks); - kernel_.setArg(idx++, *(output->opencl_image())); - - input_shape_ = input->shape(); - } - - const uint32_t gws[3] = {static_cast(input_depth_blocks), - static_cast(input_width), - static_cast(input_height * batch_size)}; - const std::vector lws = {8, 16, 8, 1}; - std::stringstream ss; - ss << "space_to_depth_opencl_kernel_" - << input->dim(0) << "_" - << input->dim(1) << "_" - << input->dim(2) << "_" - << input->dim(3); - TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); -} - -template -struct SpaceToDepthOpFunctor; -template -struct SpaceToDepthOpFunctor; - -} // namespace kernels -} // namespace mace diff --git a/mace/kernels/space_to_depth.h b/mace/kernels/space_to_depth.h deleted file mode 100644 index b3125901549cf24f35d3d1214848b4ccabaa6229..0000000000000000000000000000000000000000 --- a/mace/kernels/space_to_depth.h +++ /dev/null @@ -1,76 +0,0 @@ -// -// Created by liutuo on 18-3-20. -// - -#ifndef MACE_KERNELS_SPACE_TO_DEPTH_H -#define MACE_KERNELS_SPACE_TO_DEPTH_H - -#include "mace/core/future.h" -#include "mace/core/tensor.h" - -namespace mace { -namespace kernels { - -template -struct SpaceToDepthOpFunctor { - explicit SpaceToDepthOpFunctor(const int block_size) : block_size_(block_size) {} - void operator()(const Tensor *input, - Tensor *output, - StatsFuture *future) { - - const int batch_size = input->dim(0); - const int input_height = input->dim(1); - const int input_width = input->dim(2); - const int input_depth = input->dim(3); - - const index_t output_depth = input_depth * block_size_ * block_size_; - const index_t output_width = input_width / block_size_; - const index_t output_height = input_height / block_size_; - - std::vector output_shape = {batch_size, output_height, output_width, output_depth}; - - output->Resize(output_shape); - - Tensor::MappingGuard logits_guard(input); - Tensor::MappingGuard output_guard(output); - const T *input_ptr = input->data(); - T *output_ptr = output->mutable_data(); - -#pragma omp parallel for - for (int b = 0; b < batch_size; ++b) { - for (int h = 0; h < input_height; ++h) { - const int out_h = h / block_size_; - const int offset_h = (h % block_size_); - for (int w = 0; w < input_width; ++w) { - const int out_w = w/ block_size_; - const int offset_w = (w % block_size_); - const int offset_d = (offset_h * block_size_ + offset_w) * input_depth; - for (int d = 0; d < input_depth; ++d) { - const int out_d = d + offset_d; - const int o_index = ((b * output_height + out_h) * output_width + out_w) * output_depth + out_d; - const int i_index = ((b * input_height + h) * input_width + w) * input_depth + d; - output_ptr[o_index] = input_ptr[i_index]; - } - } - } - } - - } - const int block_size_; -}; - -template -struct SpaceToDepthOpFunctor { - - SpaceToDepthOpFunctor(const int block_size) : block_size_(block_size) {} - void operator()(const Tensor *input, Tensor *output, StatsFuture *future); - - cl::Kernel kernel_; - const int block_size_; - std::vector input_shape_; -}; - -} // namespace kernels -} // namespace mace - -#endif //MACE_KERNELS_SPACE_TO_DEPTH_H