diff --git a/mace/core/mace.cc b/mace/core/mace.cc index de6c93534e08bbdbcb71429a5dcddf69c2d50f11..9d0d0c76a2db2525906685274e8636817f10fd2c 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -493,12 +493,11 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type): ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); net_ = std::move(CreateNet(*net_def, ws_.get(), device_type)); } -MaceEngine::~MaceEngine(){} +MaceEngine::~MaceEngine() = default; bool MaceEngine::Run(const float *input, const std::vector &input_shape, float *output) { MACE_CHECK(output != nullptr, "output ptr cannot be NULL"); - Tensor *input_tensor = ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); input_tensor->Resize(input_shape); @@ -518,6 +517,7 @@ bool MaceEngine::Run(const float *input, auto shape = output_tensor->shape(); int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + // TODO: check for overflow exception. std::memcpy(output, output_tensor->data(), output_size * sizeof(float)); return true; diff --git a/mace/core/public/mace.h b/mace/core/public/mace.h index 088d7f9cacde4e800e73f5ffd5cce91d18304e53..1b0d3ceb5cfae0ae6414d5f4686732e200088b7c 100644 --- a/mace/core/public/mace.h +++ b/mace/core/public/mace.h @@ -310,6 +310,9 @@ class MaceEngine { bool Run(const float *input, const std::vector &input_shape, float *output); + MaceEngine(const MaceEngine&) = delete; + MaceEngine &operator=(const MaceEngine&) = delete; + private: DeviceType device_type_; std::unique_ptr ws_; diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index cb8a97257836042a9cfea81f640971215d68d5ec..ccdc3c7c1a202fd3f61c3d97c6ab85f5484fa423 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -173,9 +173,13 @@ int main(int argc, char **argv) { // load input ifstream in_file(input_file, ios::in | ios::binary); - in_file.read(reinterpret_cast(input_data.get()), - input_size * sizeof(float)); - in_file.close(); + if (in_file.is_open()) { + in_file.read(reinterpret_cast(input_data.get()), + input_size * sizeof(float)); + in_file.close(); + } else { + LOG(ERROR) << "Open input file failed"; + } // Init model VLOG(0) << "Run init"; diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index b28f113b0b748f0a0eb9ab309a22f45a8d01c866..1bc6f14590e6c358a387997daef2ac95b88f67f8 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -64,7 +64,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, image_shape[1], 1}; const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(b2f_kernel); - const std::vector lws = {kwg_size, 1, 1}; + const std::vector lws = {16, 64, 1}; cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( b2f_kernel, cl::NullRange, diff --git a/mace/kernels/opencl/cl/space_to_batch.cl b/mace/kernels/opencl/cl/space_to_batch.cl index 5d098d865b3dac310bf14869114e219f8cf92570..9ad635099ea3caa249a6b1f49b4eb206553219f3 100644 --- a/mace/kernels/opencl/cl/space_to_batch.cl +++ b/mace/kernels/opencl/cl/space_to_batch.cl @@ -1,58 +1,76 @@ #include -// Supported data type: all -__kernel void space_to_batch(__global DATA_TYPE *space_data_ptr, - __global const int *block_shape_ptr, - __global const int *paddings_ptr, - __private const int space_batch, - __private const int space_channel, +__kernel void space_to_batch(__read_only image2d_t space_data, + __write_only image2d_t batch_data, + __private const int block_height, + __private const int block_width, + __private const int padding_height, + __private const int padding_width, __private const int space_height, __private const int space_width, __private const int batch_height, - __private const int batch_width, - __private const int b2s, - __global DATA_TYPE* batch_data_ptr) { - int batch_idx = get_global_id(0); - int batch_channel_idx = get_global_id(1); - int batch_pixel_idx = get_global_id(2); - - const int block_height = block_shape_ptr[0]; - const int block_width = block_shape_ptr[1]; - const int padding_height_start = paddings_ptr[0]; - const int padding_width_start = paddings_ptr[2]; - - const int batch_pixel_height_idx = batch_pixel_idx / batch_width; - const int batch_pixel_width_idx = batch_pixel_idx % batch_width; - - const int block_size = block_height * block_width; - const int space_idx = batch_idx / block_size; - const int remaining_batch_idx = batch_idx % block_size; - int space_pixel_height_idx = (remaining_batch_idx / block_width) + - batch_pixel_height_idx * block_height; - int space_pixel_width_idx = (remaining_batch_idx % block_width) + - batch_pixel_width_idx * block_width; - - const int batch_data_offset = batch_idx * (space_channel * batch_height * batch_width) + - (batch_channel_idx * batch_height * batch_width) + - batch_pixel_height_idx * batch_width + - batch_pixel_width_idx; - - space_pixel_height_idx -= padding_height_start; - space_pixel_width_idx -= padding_width_start; - const int space_data_offset = space_idx * (space_channel * space_height * space_width) + - (batch_channel_idx * space_height * space_width) + - space_pixel_height_idx * space_width + - space_pixel_width_idx; - if (space_pixel_height_idx < 0 || space_pixel_height_idx >= space_height || - space_pixel_width_idx < 0 || space_pixel_width_idx >= space_width) { - if (!b2s) { - *(batch_data_ptr + batch_data_offset) = 0; - } - } else { - if (b2s) { - *(space_data_ptr + space_data_offset) = *(batch_data_ptr + batch_data_offset); - } else { - *(batch_data_ptr + batch_data_offset) = *(space_data_ptr + space_data_offset); - } + __private const int batch_width) { + const int chan_idx = get_global_id(0); + const int batch_w_idx = get_global_id(1); + const int batch_hb_idx = get_global_id(2); + + const int batch_b_idx = batch_hb_idx / batch_height; + const int batch_h_idx = batch_hb_idx % batch_height; + + const int block_size = mul24(block_height, block_width); + const int space_b_idx = batch_b_idx / block_size; + const int remaining_batch_idx = batch_b_idx % block_size; + const int space_h_idx = (remaining_batch_idx / block_width) + + mul24(batch_h_idx, block_height) - padding_height; + const int space_w_idx = (remaining_batch_idx % block_width) + + mul24(batch_w_idx, block_width) - padding_width; + + const int space_coord_x = select(mul24(chan_idx, space_width) + space_w_idx, + -1, + space_w_idx < 0 || space_w_idx >= space_width); + const int space_coord_y = select(mul24(space_b_idx, space_height) + space_h_idx, + -1, + space_h_idx < 0 || space_h_idx >= space_height); + int2 space_coord = (int2)(space_coord_x, + space_coord_y); + DATA_TYPE4 value = READ_IMAGET(space_data, SAMPLER, space_coord); + + int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx); + WRITE_IMAGET(batch_data, batch_coord, value); +} + +__kernel void batch_to_space(__read_only image2d_t batch_data, + __write_only image2d_t space_data, + __private const int block_height, + __private const int block_width, + __private const int padding_height, + __private const int padding_width, + __private const int space_height, + __private const int space_width, + __private const int batch_height, + __private const int batch_width) { + const int chan_idx = get_global_id(0); + const int batch_w_idx = get_global_id(1); + const int batch_hb_idx = get_global_id(2); + + const int batch_b_idx = batch_hb_idx / batch_height; + const int batch_h_idx = batch_hb_idx % batch_height; + + const int block_size = mul24(block_height, block_width); + const int space_b_idx = batch_b_idx / block_size; + const int remaining_batch_idx = batch_b_idx % block_size; + const int space_h_idx = (remaining_batch_idx / block_width) + + mul24(batch_h_idx, block_height) - padding_height; + const int space_w_idx = (remaining_batch_idx % block_width) + + mul24(batch_w_idx, block_width) - padding_width; + + if (0 <= space_w_idx && space_w_idx < space_width && + 0 <= space_h_idx && space_h_idx < space_height) { + int2 batch_coord = (int2)(mul24(chan_idx, batch_width) + batch_w_idx, batch_hb_idx); + DATA_TYPE4 value = READ_IMAGET(batch_data, SAMPLER, batch_coord); + + int2 space_coord = (int2)(mul24(chan_idx, space_width) + space_w_idx, + space_b_idx * space_height + space_h_idx); + WRITE_IMAGET(space_data, space_coord, value); } } diff --git a/mace/kernels/opencl/space_to_batch_opecl.cc b/mace/kernels/opencl/space_to_batch_opecl.cc deleted file mode 100644 index 32ada21a210091a02033babb8cefd08517b43e4a..0000000000000000000000000000000000000000 --- a/mace/kernels/opencl/space_to_batch_opecl.cc +++ /dev/null @@ -1,65 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -#ifndef MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ -#define MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ - -#include "mace/core/common.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/kernels/space_to_batch.h" -#include "mace/kernels/opencl/helper.h" - -namespace mace { -namespace kernels { - -template <> -void SpaceToBatchFunctor::operator()(Tensor *space_tensor, - const Tensor *block_shape_tensor, - const Tensor *paddings_tensor, - Tensor *batch_tensor, - StatsFuture *future) { - auto runtime = OpenCLRuntime::Global(); - std::set built_options; - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(space_tensor->dtype())); - auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options); - - uint32_t idx = 0; - s2b_kernel.setArg(idx++, *(static_cast(space_tensor->buffer()))); - s2b_kernel.setArg(idx++, *(static_cast(block_shape_tensor->buffer()))); - s2b_kernel.setArg(idx++, *(static_cast(paddings_tensor->buffer()))); - s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(0))); - s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(1))); - s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(2))); - s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(3))); - s2b_kernel.setArg(idx++, static_cast(batch_tensor->dim(2))); - s2b_kernel.setArg(idx++, static_cast(batch_tensor->dim(3))); - s2b_kernel.setArg(idx++, static_cast(b2s_)); - s2b_kernel.setArg(idx++, *(static_cast(batch_tensor->buffer()))); - - const uint32_t gws[3] = {static_cast(batch_tensor->dim(0)), - static_cast(batch_tensor->dim(1)), - static_cast(batch_tensor->dim(2) * batch_tensor->dim(3))}; - const uint32_t lws[3] = {static_cast(1), - static_cast(8), - static_cast(128)}; - cl::Event event; - cl_int error = runtime->command_queue().enqueueNDRangeKernel( - s2b_kernel, cl::NullRange, - cl::NDRange(gws[0], gws[1], gws[2]), - cl::NDRange(lws[0], lws[1], lws[2]), - nullptr, &event); - MACE_CHECK(error == CL_SUCCESS); - if (future != nullptr) { - future->wait_fn = [runtime, event](CallStats *stats) { - event.wait(); - if (stats != nullptr) { - runtime->GetCallStats(event, stats); - } - }; - } -} - -} // namespace kernels -} // namespace mace -#endif // MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc new file mode 100644 index 0000000000000000000000000000000000000000..4a8fb2b8810bb620e768d5d8364b0f6b2206a2d6 --- /dev/null +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -0,0 +1,119 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ +#define MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ + +#include "mace/core/common.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/kernels/space_to_batch.h" +#include "mace/kernels/opencl/helper.h" +#include "mace/utils/utils.h" +#include "mace/utils/tuner.h" + +namespace mace { +namespace kernels { + +template +void SpaceToBatchFunctor::operator()(Tensor *space_tensor, + const std::vector &output_shape, + Tensor *batch_tensor, + StatsFuture *future) { + std::vector output_image_shape; + CalImage2DShape(output_shape, BufferType::IN_OUT, output_image_shape); + const char *kernel_name = nullptr; + if (b2s_) { + space_tensor->ResizeImage(output_shape, output_image_shape); + kernel_name = "batch_to_space"; + } else { + batch_tensor->ResizeImage(output_shape, output_image_shape); + kernel_name = "space_to_batch"; + } + auto runtime = OpenCLRuntime::Global(); + std::set built_options; + built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum::value)); + auto s2b_kernel = runtime->BuildKernel("space_to_batch", kernel_name, built_options); + + uint32_t idx = 0; + if (b2s_) { + s2b_kernel.setArg(idx++, *(static_cast(batch_tensor->buffer()))); + s2b_kernel.setArg(idx++, *(static_cast(space_tensor->buffer()))); + } else { + s2b_kernel.setArg(idx++, *(static_cast(space_tensor->buffer()))); + s2b_kernel.setArg(idx++, *(static_cast(batch_tensor->buffer()))); + } + s2b_kernel.setArg(idx++, block_shape_[0]); + s2b_kernel.setArg(idx++, block_shape_[1]); + s2b_kernel.setArg(idx++, paddings_[0]); + s2b_kernel.setArg(idx++, paddings_[2]); + s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(1))); + s2b_kernel.setArg(idx++, static_cast(space_tensor->dim(2))); + s2b_kernel.setArg(idx++, static_cast(batch_tensor->dim(1))); + s2b_kernel.setArg(idx++, static_cast(batch_tensor->dim(2))); + + const uint32_t chan_blk = RoundUpDiv4(batch_tensor->dim(3)); + const uint32_t gws[3] = {chan_blk, + static_cast(batch_tensor->dim(2)), + static_cast(batch_tensor->dim(0) * batch_tensor->dim(1))}; + const std::vector lws = {8, 16, 8}; + const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(s2b_kernel); + auto params_generator = [&]() -> std::vector> { + std::vector local_ws(3, 0); + local_ws[0] = std::min(chan_blk, kwg_size); + local_ws[1] = std::min(32, kwg_size / local_ws[0]); + local_ws[2] = std::min(32, kwg_size / (local_ws[0] * local_ws[1])); + return {{local_ws[0], local_ws[1], local_ws[2]}, + {4, 32, 8}, + {4, 64, 4}, + {4, 128, 2}, + {8, 16, 8}, + {8, 32, 4}, + {8, 64, 2}, + {16, 8, 8}, + {16, 16, 4}, + {16, 32, 2}, + {32, 8, 4}, + {32, 16, 2}, + {64, 4, 4}}; + }; + cl::Event event; + auto func = [&](const std::vector ¶ms) -> cl_int { + cl_int error = runtime->command_queue().enqueueNDRangeKernel( + s2b_kernel, cl::NullRange, + cl::NDRange(gws[0], gws[1], gws[2]), + cl::NDRange(params[0], params[1], params[2]), + nullptr, &event); + + MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + return error; + }; + std::stringstream ss; + ss << kernel_name << "_" + << batch_tensor->dim(0) << "_" + << batch_tensor->dim(1) << "_" + << batch_tensor->dim(2) << "_" + << batch_tensor->dim(3); + OpenCLProfilingTimer timer(&event); + Tuner::Get()->template TuneOrRun(ss.str(), + lws, + params_generator, + func, + &timer); + if (future != nullptr) { + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; + } +} + +template struct SpaceToBatchFunctor; +template struct SpaceToBatchFunctor; + +} // namespace kernels +} // namespace mace +#endif // MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index d868329223cd798c001710b5ff94a917acc91f2e..9e2fd8762c4a7e8fa5d0d3b405d8701a6a914ed7 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -12,27 +12,46 @@ namespace mace { namespace kernels { -template -struct SpaceToBatchFunctor { - SpaceToBatchFunctor(const bool b2s = false): b2s_(b2s){} +struct SpaceToBatchFunctorBase { + SpaceToBatchFunctorBase(const std::vector &paddings, + const std::vector &block_shape, + bool b2s): + paddings_(paddings.begin(), paddings.end()), + block_shape_(block_shape.begin(), block_shape.end()), + b2s_(b2s) + {} + + std::vector paddings_; + std::vector block_shape_; + bool b2s_; +}; - void operator()(Tensor *input_tensor, - const Tensor *block_shape_tensor, - const Tensor *paddings_tensor, - Tensor *output_tensor, +template +struct SpaceToBatchFunctor : SpaceToBatchFunctorBase{ + SpaceToBatchFunctor(const std::vector &paddings, + const std::vector &block_shape, + bool b2s): SpaceToBatchFunctorBase(paddings, block_shape, b2s){} + + void operator()(Tensor *space_tensor, + const std::vector &output_shape, + Tensor *batch_tensor, StatsFuture *future) { MACE_NOT_IMPLEMENTED; } - - bool b2s_; }; -template <> -void SpaceToBatchFunctor::operator()(Tensor *input_tensor, - const Tensor *block_shape_tensor, - const Tensor *paddings_tensor, - Tensor *output, - StatsFuture *future); +template +struct SpaceToBatchFunctor: SpaceToBatchFunctorBase{ + SpaceToBatchFunctor(const std::vector &paddings, + const std::vector &block_shape, + bool b2s): SpaceToBatchFunctorBase(paddings, block_shape, b2s){} + + void operator()(Tensor *space_tensor, + const std::vector &output_shape, + Tensor *batch_tensor, + StatsFuture *future); + +}; } // namespace kernels } // namespace mace diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 61de748b0fc8b8928eb99f8ecdc7e9dc72bca932..4e4ea6610e408889b8a4ad29a786fdd33853703a 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -10,5 +10,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") .TypeConstraint("T") .Build(), BatchToSpaceNDOp); +REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") + .TypeConstraint("T") + .Build(), + BatchToSpaceNDOp); } // namespace mace diff --git a/mace/ops/batch_to_space.h b/mace/ops/batch_to_space.h index 286b67731cd7a00f338e2db62b3d81ece60990ec..59f8e03b0c6dd47a642efdffe72ae00988826a08 100644 --- a/mace/ops/batch_to_space.h +++ b/mace/ops/batch_to_space.h @@ -12,63 +12,58 @@ namespace mace { -static void BatchToSpaceHelper(const Tensor *input_tensor, - const Tensor *block_shape_tensor, - const Tensor *cropped_tensor, - Tensor *output) { - MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); - MACE_CHECK(block_shape_tensor->dim_size() == 1, "Block's shape should be 1D"); - MACE_CHECK(cropped_tensor->dim_size() == 2, "Paddings' shape should be 2D"); - - const index_t block_dims = block_shape_tensor->dim(0); - MACE_CHECK(block_dims == cropped_tensor->dim(0) && 2 == cropped_tensor->dim(1)); - // TODO change tensor to attribute if needed based on the benchmark - Tensor::MappingGuard block_shape_tensor_mapper(block_shape_tensor); - Tensor::MappingGuard cropped_tensor_mapper(cropped_tensor); - const int *block_shape_ptr = block_shape_tensor->data(); - const int *cropped_ptr = cropped_tensor->data(); - std::vector output_shape(4, 0); - index_t block_shape_product = 1; - for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { - MACE_CHECK(block_shape_ptr[block_dim] > 1, "block_shape's value should be great to 1"); - const index_t block_shape_value = block_shape_ptr[block_dim]; - const index_t cropped_input_size = input_tensor->dim(block_dim + 2) * block_shape_value - - *cropped_ptr - - *(cropped_ptr+1); - MACE_CHECK(cropped_input_size >= 0, - "cropped size must be non-negative"); - block_shape_product *= block_shape_value; - output_shape[block_dim+2] = cropped_input_size; - cropped_ptr += 2; - } - output_shape[0] = input_tensor->dim(0) / block_shape_product; - output_shape[1] = input_tensor->dim(1); - - output->Resize(output_shape); -} - -template -class BatchToSpaceNDOp: public Operator { +template +class BatchToSpaceNDOp : public Operator { public: BatchToSpaceNDOp(const OperatorDef &op_def, Workspace *ws) - : Operator(op_def, ws), functor_(true) {} + : Operator(op_def, ws), + functor_( + OperatorBase::GetRepeatedArgument("crops", {0, 0, 0, 0}), + OperatorBase::GetRepeatedArgument("block_shape", {1, 1}), + true) {} bool Run(StatsFuture *future) override { - const Tensor *input_tensor = this->Input(INPUT); - const Tensor *block_shape_tensor = this->Input(BLOCK_SHAPE); - const Tensor *cropped_tensor = this->Input(CROPS); - Tensor *output = this->Output(OUTPUT); + const Tensor *batch_tensor = this->Input(INPUT); + Tensor *space_tensor = this->Output(OUTPUT); - BatchToSpaceHelper(input_tensor, block_shape_tensor, cropped_tensor, output); - functor_(output, block_shape_tensor, cropped_tensor, const_cast(input_tensor), future); + std::vector output_shape(4, 0); + CalculateOutputShape(batch_tensor, space_tensor, output_shape.data()); + functor_(space_tensor, output_shape, const_cast(batch_tensor), future); return true; } + private: + inline void CalculateOutputShape(const Tensor *input_tensor, + Tensor *output, + index_t *output_shape) { + auto crops = OperatorBase::GetRepeatedArgument("crops", {0, 0, 0, 0}); + auto block_shape = OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); + MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); + MACE_CHECK(block_shape.size() == 2, "Block's shape should be 1D"); + MACE_CHECK(crops.size() == 4, "Crops' shape should be 2D"); + + const index_t block_dims = block_shape.size(); + index_t block_shape_product = 1; + for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { + MACE_CHECK(block_shape[block_dim] > 1, "block_shape's value should be great to 1"); + const index_t block_shape_value = block_shape[block_dim]; + const index_t cropped_input_size = input_tensor->dim(block_dim + 1) * block_shape_value + - crops[block_dim * 2] + - crops[block_dim * 2 + 1]; + MACE_CHECK(cropped_input_size >= 0, + "cropped size must be non-negative"); + block_shape_product *= block_shape_value; + output_shape[block_dim + 1] = cropped_input_size; + } + output_shape[0] = input_tensor->dim(0) / block_shape_product; + output_shape[3] = input_tensor->dim(3); + } + private: kernels::SpaceToBatchFunctor functor_; protected: - OP_INPUT_TAGS(INPUT, BLOCK_SHAPE, CROPS); + OP_INPUT_TAGS(INPUT); OP_OUTPUT_TAGS(OUTPUT); }; diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index 89e100f1ebfe3ac61db1a34e4b5b4d446ec7d4d9..a3a0e0b6ee508e0f1333e33b70dabf982c9cd903 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -9,23 +9,19 @@ namespace mace { template static void BMBatchToSpace( - int iters, int batch, int channels, int height, int width) { + int iters, int batch, int channels, int height, int width, int arg) { mace::testing::StopTiming(); OpsTestNet net; + net.AddRandomInput("Input", {batch, height, width, channels}); + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("Input") - .Input("BlockShape") - .Input("Crops") - .Output("Output") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("crops", {0, 0, 0, 0}) + .AddIntsArg("block_shape", {arg, arg}) .Finalize(net.NewOperatorDef()); - // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); - net.AddInputFromArray( - "BlockShape", {2}, {2, 2}); - net.AddInputFromArray("Crops", {2, 2}, {0,1,0,1}); - // Warm-up for (int i = 0; i < 5; ++i) { net.RunOp(D); @@ -39,18 +35,20 @@ static void BMBatchToSpace( net.Sync(); } -#define BM_BATCH_TO_SPACE_MACRO(N, C, H, W, TYPE, DEVICE) \ - static void BM_BATCH_TO_SPACE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ +#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \ + static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::ItemsProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMBatchToSpace(iters, N, C, H, W); \ + BMBatchToSpace(iters, N, C, H, W, ARG); \ } \ - BENCHMARK(BM_BATCH_TO_SPACE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) + BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) -#define BM_BATCH_TO_SPACE(N, C, H, W, TYPE) \ - BM_BATCH_TO_SPACE_MACRO(N, C, H, W, TYPE, OPENCL); +#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ + BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL); -BM_BATCH_TO_SPACE(128, 128, 8, 8, float); +BM_BATCH_TO_SPACE(128, 8, 8, 128, 2, float); +BM_BATCH_TO_SPACE(4, 128, 128, 32, 2, float); +BM_BATCH_TO_SPACE(16, 64, 64, 32, 4, float); } // namespace mace \ No newline at end of file diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 04bb3c67cf1b896ca617f92a6abbad18bf29abf1..ea86167ab681d8deab7589777bfb3135bf51da75 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -322,9 +322,18 @@ struct Expector { Tensor::MappingGuard y_mapper(&y); auto a = x.data(); auto b = y.data(); - for (int i = 0; i < x.size(); ++i) { - EXPECT_NEAR(a[i], b[i], abs_err) << "a = " << a << " b = " << b - << " index = " << i; + for (int n = 0; n < x.dim(0); ++n) { + for (int h = 0; h < x.dim(1); ++h) { + for (int w = 0; w < x.dim(2); ++w) { + for (int c = 0; c < x.dim(3); ++c) { + EXPECT_NEAR(*a, *b, abs_err) << "with index = [" + << n << ", " << h << ", " + << w << ", " << c << "]"; + a++; + b++; + } + } + } } } diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index fec9866872e94aa4aa1dd2f218d0585ebdc776c1..9b24f591d7e812262c4bfbb6ee9fac07cb2b4b3c 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -10,5 +10,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") .TypeConstraint("T") .Build(), SpaceToBatchNDOp); +REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") + .TypeConstraint("T") + .Build(), + SpaceToBatchNDOp); } // namespace mace diff --git a/mace/ops/space_to_batch.h b/mace/ops/space_to_batch.h index 6abc7772e71a55baeb89592661c745e9598c09ed..787b82e6b49abfc6e51a826c2e52ad7b1373d950 100644 --- a/mace/ops/space_to_batch.h +++ b/mace/ops/space_to_batch.h @@ -12,62 +12,59 @@ namespace mace { -static void SpaceToBatchHelper(const Tensor *input_tensor, - const Tensor *block_shape_tensor, - const Tensor *paddings_tensor, - Tensor *output) { - MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); - MACE_CHECK(block_shape_tensor->dim_size() == 1, "Block's shape should be 1D"); - MACE_CHECK(paddings_tensor->dim_size() == 2, "Paddings' shape should be 2D"); - - const index_t block_dims = block_shape_tensor->dim(0); - MACE_CHECK(block_dims == paddings_tensor->dim(0) && 2 == paddings_tensor->dim(1)); - Tensor::MappingGuard block_shape_tensor_mapper(block_shape_tensor); - Tensor::MappingGuard padding_tensor_mapper(paddings_tensor); - const int *block_shape_ptr = block_shape_tensor->data(); - const int *paddings_ptr = paddings_tensor->data(); - std::vector output_shape(4, 0); - index_t block_shape_product = 1; - for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { - MACE_CHECK(block_shape_ptr[block_dim] > 1, "block_shape's value should be great to 1"); - const index_t block_shape_value = block_shape_ptr[block_dim]; - const index_t padded_input_size = input_tensor->dim(block_dim + 2) - + *paddings_ptr - + *(paddings_ptr+1); - MACE_CHECK(padded_input_size % block_shape_value == 0, - "padded input is not divisible by block_shape"); - block_shape_product *= block_shape_value; - output_shape[block_dim+2] = padded_input_size / block_shape_value; - paddings_ptr += 2; - } - output_shape[0] = input_tensor->dim(0) * block_shape_product; - output_shape[1] = input_tensor->dim(1); - - output->Resize(output_shape); -} - -template +template class SpaceToBatchNDOp : public Operator { public: SpaceToBatchNDOp(const OperatorDef &op_def, Workspace *ws) - : Operator(op_def, ws) {} + : Operator(op_def, ws), + functor_( + OperatorBase::GetRepeatedArgument("paddings", {0, 0, 0, 0}), + OperatorBase::GetRepeatedArgument("block_shape", {1, 1}), + false) {} bool Run(StatsFuture *future) override { - const Tensor *input_tensor = this->Input(INPUT); - const Tensor *block_shape_tensor = this->Input(BLOCK_SHAPE); - const Tensor *paddings_tensor = this->Input(PADDINGS); - Tensor *output = this->Output(OUTPUT); + const Tensor *space_tensor = this->Input(INPUT); + Tensor *batch_tensor = this->Output(OUTPUT); - SpaceToBatchHelper(input_tensor, block_shape_tensor, paddings_tensor, output); - functor_(const_cast(input_tensor), block_shape_tensor, paddings_tensor, output, future); + std::vector output_shape(4, 0); + CalculateOutputShape(space_tensor, batch_tensor, output_shape.data()); + functor_(const_cast(space_tensor), output_shape, batch_tensor, future); return true; } + private: + + inline void CalculateOutputShape(const Tensor *input_tensor, + Tensor *output, + index_t *output_shape) { + auto paddings = OperatorBase::GetRepeatedArgument("paddings", {0, 0, 0, 0}); + auto block_shape = OperatorBase::GetRepeatedArgument("block_shape", {1, 1}); + MACE_CHECK(input_tensor->dim_size() == 4, "Input's shape should be 4D"); + MACE_CHECK(block_shape.size() == 2, "Block's shape should be 1D"); + MACE_CHECK(paddings.size() == 4, "Paddings' shape should be 2D"); + + const index_t block_dims = block_shape.size(); + index_t block_shape_product = 1; + for (uint32_t block_dim = 0; block_dim < block_dims; ++block_dim) { + MACE_CHECK(block_shape[block_dim] > 1, "block_shape's value should be great to 1"); + const index_t block_shape_value = block_shape[block_dim]; + const index_t padded_input_size = input_tensor->dim(block_dim + 1) + + paddings[block_dim * 2] + + paddings[block_dim * 2 + 1]; + MACE_CHECK(padded_input_size % block_shape_value == 0, + "padded input ", padded_input_size, " is not divisible by block_shape"); + block_shape_product *= block_shape_value; + output_shape[block_dim + 1] = padded_input_size / block_shape_value; + } + output_shape[0] = input_tensor->dim(0) * block_shape_product; + output_shape[3] = input_tensor->dim(3); + } + private: kernels::SpaceToBatchFunctor functor_; protected: - OP_INPUT_TAGS(INPUT, BLOCK_SHAPE, PADDINGS); + OP_INPUT_TAGS(INPUT); OP_OUTPUT_TAGS(OUTPUT); }; diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index 5e119a041fd33603d38ad99c0f5084575c25d20d..9afa88b98e2fc22a66f8779e980122c69f3d0f20 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -9,23 +9,20 @@ namespace mace { template static void BMSpaceToBatch( - int iters, int batch, int channels, int height, int width) { + int iters, int batch, int height, int width, int channels, int shape) { mace::testing::StopTiming(); OpsTestNet net; + net.AddRandomInput("Input", {batch, height, width, channels}); + + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("Input") - .Input("BlockShape") - .Input("Padding") - .Output("Output") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", {shape, shape, shape, shape}) + .AddIntsArg("block_shape", {shape, shape}) .Finalize(net.NewOperatorDef()); - // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); - net.AddInputFromArray( - "BlockShape", {2}, {2, 2}); - net.AddInputFromArray("Padding", {2, 2}, {2,3,2,3}); - // Warm-up for (int i = 0; i < 5; ++i) { net.RunOp(D); @@ -39,18 +36,20 @@ static void BMSpaceToBatch( net.Sync(); } -#define BM_SPACE_TO_BATCH_MACRO(N, C, H, W, TYPE, DEVICE) \ - static void BM_SPACE_TO_BATCH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ +#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ + static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::ItemsProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMSpaceToBatch(iters, N, C, H, W); \ + BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ } \ - BENCHMARK(BM_SPACE_TO_BATCH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) + BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) -#define BM_SPACE_TO_BATCH(N, C, H, W, TYPE) \ - BM_SPACE_TO_BATCH_MACRO(N, C, H, W, TYPE, OPENCL); +#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ + BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL); -BM_SPACE_TO_BATCH(128, 128, 15, 15, float); +BM_SPACE_TO_BATCH(128, 16, 16, 128, 2, float); +BM_SPACE_TO_BATCH(1, 256, 256, 32, 2, float); +BM_SPACE_TO_BATCH(1, 256, 256, 32, 4, float); } // namespace mace \ No newline at end of file diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index 78e933df1aaa63e6d338b7d2822d3250e26889db..4c1dbbdcd5a60c9d17a131888755d6fb12a1630f 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -4,79 +4,70 @@ #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" +#include using namespace mace; -template +template void RunSpaceToBatch(const std::vector &input_shape, const std::vector &input_data, - const std::vector &block_shape_shape, const std::vector &block_shape_data, - const std::vector &padding_shape, const std::vector &padding_data, const Tensor *expected) { OpsTestNet net; - OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("Input") - .Input("BlockShape") - .Input("Padding") - .Output("Output") - .Finalize(net.NewOperatorDef()); - - // Add input data net.AddInputFromArray( "Input", input_shape, input_data); - net.AddInputFromArray( - "BlockShape", block_shape_shape, block_shape_data); - net.AddInputFromArray("Padding", padding_shape, padding_data); + + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", padding_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); + ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); - } -template +template void RunBatchToSpace(const std::vector &input_shape, const std::vector &input_data, - const std::vector &block_shape_shape, const std::vector &block_shape_data, - const std::vector &crops_shape, const std::vector &crops_data, const Tensor *expected) { OpsTestNet net; - OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("Input") - .Input("BlockShape") - .Input("Crops") - .Output("Output") - .Finalize(net.NewOperatorDef()); - // Add input data net.AddInputFromArray( "Input", input_shape, input_data); - net.AddInputFromArray( - "BlockShape", block_shape_shape, block_shape_data); - net.AddInputFromArray("Crops", crops_shape, crops_data); + + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("crops", crops_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); + ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); } -template -void TestBidirectionTransform(const std::vector &space_shape, - const std::vector &space_data, - const std::vector &block_shape, - const std::vector &block_data, - const std::vector &padding_shape, - const std::vector &padding_data, - const std::vector &batch_shape, - const std::vector &batch_data) { +template +void TestBidirectionalTransform(const std::vector &space_shape, + const std::vector &space_data, + const std::vector &block_data, + const std::vector &padding_data, + const std::vector &batch_shape, + const std::vector &batch_data) { auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v())); @@ -101,99 +92,157 @@ void TestBidirectionTransform(const std::vector &space_shape, } RunSpaceToBatch(space_shape, space_data, - block_shape, block_data, - padding_shape, padding_data, + block_data, + padding_data, batch_tensor.get()); RunBatchToSpace(batch_shape, batch_data, - block_shape, block_data, - padding_shape, padding_data, + block_data, + padding_data, space_tensor.get()); } TEST(SpaceToBatchTest, SmallData) { - TestBidirectionTransform({1, 1, 2, 2}, - {1,2,3,4}, - {2}, - {2, 2}, - {2, 2}, - {0, 0, 0, 0}, - {4,1,1,1}, - {1,2,3,4} + TestBidirectionalTransform({1, 2, 2, 1}, + {1, 2, 3, 4}, + {2, 2}, + {0, 0, 0, 0}, + {4, 1, 1, 1}, + {1, 2, 3, 4} ); } TEST(SpaceToBatchTest, SmallDataWithOnePadding) { - TestBidirectionTransform({1, 1, 2, 2}, - {1,2,3,4}, - {2}, - {3, 3}, - {2, 2}, - {1, 0, 1, 0}, - {9,1,1,1}, - {0,0,0,0,1,2,0,3,4} + TestBidirectionalTransform({1, 2, 2, 1}, + {1, 2, 3, 4}, + {3, 3}, + {1, 0, 1, 0}, + {9, 1, 1, 1}, + {0, 0, 0, 0, 1, 2, 0, 3, 4} ); } TEST(SpaceToBatchTest, SmallDataWithTwoPadding) { - TestBidirectionTransform({1, 1, 2, 2}, - {1,2,3,4}, - {2}, - {2, 2}, - {2, 2}, - {1, 1, 1, 1}, - {4,1,2,2}, - {0,0,0,4,0,0,3,0,0,2,0,0,1,0,0,0} + TestBidirectionalTransform({1, 2, 2, 1}, + {1, 2, 3, 4}, + {2, 2}, + {1, 1, 1, 1}, + {4, 2, 2, 1}, + {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0} + ); +} + +TEST(SpaceToBatchTest, SmallDataWithLargeImage) { + TestBidirectionalTransform({1, 2, 10, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + {2, 2}, + {0, 0, 0, 0}, + {4, 1, 5, 1}, + {1, 3, 5, 7, 9, + 2, 4, 6, 8, 10, + 11, 13, 15, 17, 19, + 12, 14, 16, 18, 20} ); } TEST(SpaceToBatchTest, MultiChannelData) { - TestBidirectionTransform({1, 3, 2, 2}, - {1,2,3,4,5,6,7,8,9,10,11,12}, - {2}, - {2, 2}, - {2, 2}, - {0, 0, 0, 0}, - {4,3,1,1}, - {1,5,9,2,6,10,3,7,11,4,8,12} - ); + TestBidirectionalTransform({1, 2, 2, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {2, 2}, + {0, 0, 0, 0}, + {4, 1, 1, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + ); } TEST(SpaceToBatchTest, LargerMultiChannelData) { - TestBidirectionTransform({1, 1, 4, 4}, - {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, - {2}, - {2, 2}, - {2, 2}, - {0, 0, 0, 0}, - {4,1,2,2}, - {1,3,9,11,2,4,10,12,5,7,13,15,6,8,14,16} + TestBidirectionalTransform({1, 4, 4, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, + {0, 0, 0, 0}, + {4, 2, 2, 1}, + {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16} ); } TEST(SpaceToBatchTest, MultiBatchData) { - TestBidirectionTransform({2, 1, 2, 4}, - {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, - {2}, - {2, 2}, - {2, 2}, - {0, 0, 0, 0}, - {8,1,1,2}, - {1,3,2,4,5,7,6,8,9,11,10,12,13,15,14,16} + TestBidirectionalTransform({2, 2, 4, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, + {0, 0, 0, 0}, + {8, 1, 2, 1}, + {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16} ); } TEST(SpaceToBatchTest, MultiBatchAndChannelData) { - TestBidirectionTransform({2, 2, 2, 4}, - {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}, - {2}, - {2, 2}, - {2, 2}, - {0, 0, 0, 0}, - {8,2,1,2}, - {1,3,9,11,2,4,10,12,5,7,13,15,6,8,14,16, - 17,19,25,27,18,20,26,28,21,23,29,31,22,24,30,32} + TestBidirectionalTransform({2, 2, 4, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + {2, 2}, + {0, 0, 0, 0}, + {8, 1, 2, 2}, + {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, + 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32} ); } +//TEST(SpaceTobatchTest, CompareTF) { +// +// const std::string space_file = "/data/local/tmp/test/input"; +// const std::string batch_file = "/data/local/tmp/test/output"; +// const std::vector space_shape = {1, 256, 256, 32}; +// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies()); +// const std::vector batch_shape = {4, 130, 130, 32}; +// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies()); +// +// auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// DataTypeToEnum::v())); +// space_tensor->Resize(space_shape); +// std::vector space_data(space_size, 0.0); +// std::ifstream in_file(space_file, std::ios::in | std::ios::binary); +// if (in_file.is_open()) { +// in_file.read(reinterpret_cast(space_data.data()), +// space_size * sizeof(float)); +// in_file.close(); +// Tensor::MappingGuard space_mapper(space_tensor.get()); +// float *space_ptr = space_tensor->mutable_data(); +// MACE_CHECK(static_cast(space_tensor->size()) == space_data.size()) +// << "Space tensor size:" << space_tensor->size() +// << ", space data size:" << space_data.size(); +// memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(float)); +// } else { +// VLOG(0) << "open space file failed"; +// } +// +// auto batch_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// DataTypeToEnum::v())); +// std::vector batch_data(batch_size, 0.0); +// batch_tensor->Resize(batch_shape); +// { +// std::ifstream in_file(batch_file, std::ios::in | std::ios::binary); +// if (in_file.is_open()) { +// in_file.read(reinterpret_cast(batch_data.data()), +// batch_size * sizeof(float)); +// in_file.close(); +// } else { +// VLOG(0) << "open batch file failed"; +// } +// Tensor::MappingGuard batch_mapper(batch_tensor.get()); +// float *batch_ptr = batch_tensor->mutable_data(); +// MACE_CHECK(static_cast(batch_tensor->size()) == batch_data.size()); +// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float)); +// } +// +// RunSpaceToBatch(space_shape, space_data, +// {2, 2}, +// {2, 2, 2, 2}, +// batch_tensor.get()); +// +// RunBatchToSpace(batch_shape, batch_data, +// {2, 2}, +// {2, 2, 2, 2}, +// space_tensor.get()); +//} + diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index 99094b886d9a89dacf881c7fbcfc2eb8c6563e8a..0378c018b3bc33564bcc3cfd80669e1082309be0 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -363,6 +363,27 @@ class TFConverter(object): self.net_def.op.extend([op_def]) self.resolved_ops[op.name] = 1 + def convert_space_to_batch(self, op, b2s): + op_def = self.net_def.op.add() + arg = op_def.arg.add() + arg.name = 'T' + arg.i = self.dt + op_def.name = op.name + op_def.type = op.type + op_def.input.extend([op.inputs[0].name]) + op_def.output.extend([output.name for output in op.outputs]) + size_arg = op_def.arg.add() + size_arg.name = 'block_shape' + size_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat) + size_arg = op_def.arg.add() + if b2s: + size_arg.name = 'crops' + else: + size_arg.name = 'paddings' + size_arg.ints.extend(get_input_tensor(op, 2).eval().astype(np.int32).flat) + self.add_output_shape(op.outputs, op_def) + self.resolved_ops[op.name] = 1 + def convert_normal_op(self, op): op_def = self.net_def.op.add() arg = op_def.arg.add() @@ -405,7 +426,11 @@ class TFConverter(object): self.convert_resize_bilinear(op) elif op.type == 'BiasAdd': self.convert_bias_add(op) - elif op.type in ['Relu', 'SpaceToBatchND', 'BatchToSpaceND']: + elif op.type == 'SpaceToBatchND': + self.convert_space_to_batch(op, False) + elif op.type == 'BatchToSpaceND': + self.convert_space_to_batch(op, True) + elif op.type in ['Relu']: self.convert_normal_op(op) else: raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) diff --git a/tools/gcn.config b/tools/gcn.config new file mode 100644 index 0000000000000000000000000000000000000000..304d7a2931ee288619cb08d99193828d2cd2cc9a --- /dev/null +++ b/tools/gcn.config @@ -0,0 +1,2 @@ +TF_INPUT_NODE=input +TF_OUTPUT_NODE=GCN/br_result_2/fcn_br \ No newline at end of file diff --git a/tools/side_gcn.config b/tools/side_gcn.config new file mode 100644 index 0000000000000000000000000000000000000000..d22d730bac70cce3f5c665b5c83c56334f1de319 --- /dev/null +++ b/tools/side_gcn.config @@ -0,0 +1,2 @@ +TF_INPUT_NODE=input_node +TF_OUTPUT_NODE=GCN/br_result_x/fcn_br \ No newline at end of file diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index a0f8e580ba174664e53dada6f9f0b83a3466c6d6..1359a356bc84b89b6c711d2ab1e2108e4ddb99d3 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -2,7 +2,7 @@ # Must run at root dir of mace project. set +x Usage() { - echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size [tuning]' + echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path image_size [tuning]' } if [ $# -lt 2 ];then @@ -10,8 +10,10 @@ if [ $# -lt 2 ];then exit -1 fi +source $1 + VLOG_LEVEL=0 -TF_MODEL_FILE_PATH=$1 +TF_MODEL_FILE_PATH=$2 MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) MACE_SOURCE_DIR=`/bin/pwd` MACE_MODEL_NAME='mace_model.pb' @@ -20,14 +22,14 @@ OUTPUT_FILE_NAME='gcn.out' OUTPUT_LIST_FILE='gcn.list' PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}" KERNEL_DIR="${PHONE_DATA_DIR}/cl/" -IMAGE_SIZE=$2 +IMAGE_SIZE=$3 MODEL_TAG=GCN${IMAGE_SIZE} CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning -TUNING_OR_NOT=${3:-0} +TUNING_OR_NOT=${4:-0} VERSION_SOURCE_PATH=${CODEGEN_DIR}/version build_and_run() @@ -87,8 +89,8 @@ rm -rf ${MODEL_CODEGEN_DIR} mkdir -p ${MODEL_CODEGEN_DIR} bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \ --output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \ - --input_node=input \ - --output_node=GCN/br_result_2/fcn_br \ + --input_node=${TF_INPUT_NODE} \ + --output_node=${TF_OUTPUT_NODE} \ --data_type=DT_HALF \ --runtime=gpu \ --output_type=source \ @@ -129,7 +131,7 @@ echo "Step 9: Validate the result" python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \ --input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \ --mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \ - --input_node input \ - --output_node GCN/br_result_2/fcn_br\ + --input_node ${TF_INPUT_NODE} \ + --output_node ${TF_OUTPUT_NODE} \ --input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \ --output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"