diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 280b84d38659605c29ee71c6f479747bd506abac..4e14636e00d49b0ac023f1e319818258f2d23000 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -60,7 +60,9 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, img_format, image_shape[0], image_shape[1], 0, nullptr, &error); - MACE_CHECK(error == CL_SUCCESS); + MACE_CHECK(error == CL_SUCCESS) << error << " with image shape: [" + << image_shape[0] << ", " << image_shape[1] + << "]"; return cl_image; } diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 70ca43b67efcfdfea95e5d8246f08d61450cc053..21b8d5198b6e57d32c278d09e2bb8cc87aea666c 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -192,18 +192,9 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, *program = cl::Program(this->context(), {device()}, {binary}); #else std::string source_filename = kernel_path_ + program_file_name; - std::string binary_filename = kernel_path_ + binary_file_name_prefix + ".bin"; // Create program - bool is_binary_filename_exist = std::ifstream(binary_filename).is_open(); - if (is_binary_filename_exist) { - VLOG(1) << "Create program with binary: " << binary_filename; - std::vector binary; - MACE_CHECK(ReadFile(binary_filename, true, &binary)); - - *program = cl::Program(this->context(), {device()}, {binary}); - - } else if (std::ifstream(source_filename).is_open()) { + if (std::ifstream(source_filename).is_open()) { VLOG(1) << "Create program with source: " << source_filename; std::vector kernel_source; MACE_CHECK(ReadFile(source_filename, false, &kernel_source)); @@ -214,8 +205,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, *program = cl::Program(this->context(), sources); } else { - LOG(FATAL) << "Failed to open kernel file " << binary_filename << " or " - << source_filename; + LOG(FATAL) << "Failed to open kernel file " << source_filename; } #endif @@ -237,32 +227,31 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, #ifndef MACE_EMBED_BINARY_PROGRAM // Write binary if necessary - if (!is_binary_filename_exist) { - size_t device_list_size = 1; - std::unique_ptr program_binary_sizes( - new size_t[device_list_size]); - cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * device_list_size, - program_binary_sizes.get(), nullptr); - MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; - std::unique_ptr[]> program_binaries( - new std::unique_ptr[device_list_size]); - for (cl_uint i = 0; i < device_list_size; ++i) { - program_binaries[i] = std::unique_ptr( - new unsigned char[program_binary_sizes[i]]); - } + std::string binary_filename = kernel_path_ + binary_file_name_prefix + ".bin"; + size_t device_list_size = 1; + std::unique_ptr program_binary_sizes( + new size_t[device_list_size]); + cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * device_list_size, + program_binary_sizes.get(), nullptr); + MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + std::unique_ptr[]> program_binaries( + new std::unique_ptr[device_list_size]); + for (cl_uint i = 0; i < device_list_size; ++i) { + program_binaries[i] = std::unique_ptr( + new unsigned char[program_binary_sizes[i]]); + } - err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES, - sizeof(unsigned char *) * device_list_size, - program_binaries.get(), nullptr); - MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; - std::vector content( - reinterpret_cast(program_binaries[0].get()), - reinterpret_cast(program_binaries[0].get()) + - program_binary_sizes[0]); + err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES, + sizeof(unsigned char *) * device_list_size, + program_binaries.get(), nullptr); + MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + std::vector content( + reinterpret_cast(program_binaries[0].get()), + reinterpret_cast(program_binaries[0].get()) + + program_binary_sizes[0]); - MACE_CHECK(WriteFile(binary_filename, true, content)); - } + MACE_CHECK(WriteFile(binary_filename, true, content)); #endif } diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index ccdc3c7c1a202fd3f61c3d97c6ab85f5484fa423..3cfbcbf98bf3c215c7c9041eb8d2237f980c0673 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -212,12 +212,14 @@ int main(int argc, char **argv) { LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us"; } - MACE_CHECK(engine.Run(input_data.get(), input_shape_vec, output_data.get())); if (output_data != nullptr) { ofstream out_file(output_file, ios::binary); out_file.write((const char *) (output_data.get()), output_size * sizeof(float)); out_file.flush(); out_file.close(); + LOG(INFO) << "Write output file done."; + } else { + LOG(ERROR) << "output data is null"; } } diff --git a/mace/kernels/opencl/cl/conv_2d.cl b/mace/kernels/opencl/cl/conv_2d.cl index ccfe86d85c83a24898ce9640747fe46b95be2d1f..8929b0488d2c18cf033fdb3a4ae2fc19e0c24326 100644 --- a/mace/kernels/opencl/cl/conv_2d.cl +++ b/mace/kernels/opencl/cl/conv_2d.cl @@ -14,7 +14,9 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ __private const int filter_height, __private const int filter_width, __private const int padding_top, - __private const int padding_left) { + __private const int padding_left, + __private const int dilation_h, + __private const int dilation_w) { const int out_ch_blk = get_global_id(0); const int out_w_blk = get_global_id(1); const int out_w_blks = get_global_size(1); @@ -57,7 +59,7 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ const int in_idx = mul24(in_ch_blk, in_width); int filter_x_part0 = in_ch_blk << 2; for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) { - int in_hb_value = height_idx + hb_idx; + int in_hb_value = height_idx + mul24(hb_idx, dilation_h); in_hb_value = select(in_hb_value + batch_idx, -1, (in_hb_value < 0 || in_hb_value >= in_height)); @@ -66,7 +68,7 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ for (short width_idx = 0; width_idx < filter_width; ++width_idx) { int in_width_value; #define READ_INPUT(i) \ - in_width_value = in_width##i + width_idx; \ + in_width_value = in_width##i + mul24(width_idx, dilation_w); \ in_width_value = select(in_idx + in_width_value, \ -1, \ (in_width_value < 0 || in_width_value >= in_width)); \ diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/kernels/opencl/cl/conv_2d_3x3.cl index 8a5ece6b31d907fc0a564c7407c969d6102b4c3a..019db2378755314bee009dceb084233a7a72db5a 100644 --- a/mace/kernels/opencl/cl/conv_2d_3x3.cl +++ b/mace/kernels/opencl/cl/conv_2d_3x3.cl @@ -12,7 +12,9 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] __private const int out_height, __private const int out_width, __private const int padding_top, - __private const int padding_left) { + __private const int padding_left, + __private const int dilation_h, + __private const int dilation_w) { const int out_ch_blk = get_global_id(0); const int out_w_blk = get_global_id(1); const int out_w_blks = get_global_size(1); @@ -55,12 +57,11 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] DATA_TYPE4 in0, in1, in2, in3, in4; DATA_TYPE4 weights0, weights1, weights2, weights3; - int hb_idx, width_idx, in_width_idx; for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) { const int in_idx = mul24(in_ch_blk, in_width); int filter_x_part0 = in_ch_blk << 2; for (short hb_idx = 0; hb_idx < 3; ++hb_idx) { - int in_hb_value = height_idx + hb_idx; + int in_hb_value = height_idx + mul24(hb_idx, dilation_h); in_hb_value = select(in_hb_value + batch_idx, -1, (in_hb_value < 0 || in_hb_value >= in_height)); @@ -68,7 +69,7 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] for (short width_idx = 0; width_idx < 3; ++width_idx) { int in_width_value; #define READ_INPUT(i) \ - in_width_value = in_width##i + width_idx; \ + in_width_value = in_width##i + mul24(width_idx, dilation_w); \ in_width_value = select(in_idx + in_width_value, \ -1, \ (in_width_value < 0 || in_width_value >= in_width)); \ diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 947f781811356b10a93d61ccdf51b0956ac036e0..5fa46128d9c8f0cb2f7bfe5fd42f6299997613b9 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -10,29 +10,33 @@ namespace kernels { extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, StatsFuture *future); + const int *padding, const int *dilations, + const DataType dt, Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK1x1S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, StatsFuture *future); + const int *padding, const int *dilations, + const DataType dt, Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, StatsFuture *future); + const int *padding, const int *dilations, + const DataType dt, Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, StatsFuture *future); + const int *padding, const int *dilations, + const DataType dt, Tensor *output, + StatsFuture *future); extern void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output, - StatsFuture *future); + const int *dilations, const DataType dt, + Tensor *output, StatsFuture *future); template void Conv2dFunctor::operator()(const Tensor *input, @@ -42,8 +46,8 @@ void Conv2dFunctor::operator()(const Tensor *input, StatsFuture *future) { typedef void (*Conv2dOpenclFunction)(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, + const int *padding, const int *dilations, + const DataType dt, Tensor *output, StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5][2] = { @@ -55,12 +59,14 @@ void Conv2dFunctor::operator()(const Tensor *input, index_t kernel_h = filter->dim(0); index_t kernel_w = filter->dim(1); - if (!input->is_image() || strides_[0] != strides_[1] || - strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1) { + if (!input->is_image() || strides_[0] != strides_[1] || strides_[0] > 2 || + (dilations_[0] > 1 && (strides_[0] > 1 || kernel_h == 1))) { LOG(WARNING) << "OpenCL conv2d kernel with " << "filter" << kernel_h << "x" << kernel_w << "," << " stride " << strides_[0] << "x" << strides_[1] - << " is not implemented yet, using slow version"; + << ",dilations " << dilations_[0] << "x" << dilations_[1] + << " and input image: " << input->is_image() + << " is not implemented yet."; MACE_NOT_IMPLEMENTED; } @@ -77,11 +83,11 @@ void Conv2dFunctor::operator()(const Tensor *input, if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1][strides_[0] - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_func(input, filter, bias, false, paddings.data(), + conv2d_func(input, filter, bias, false, paddings.data(), dilations_, DataTypeToEnum::value, output, future); } else { Conv2dOpencl(input, filter, bias, false, strides_[0], - paddings.data(), DataTypeToEnum::value, + paddings.data(), dilations_, DataTypeToEnum::value, output, future); } diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index e2146a2e00dcc9ed0b1425a10bd1ee470e13dbb4..d56926a85357978f1857768b55362bc44ed1a5c3 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -129,6 +129,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *bias, const bool fused_relu, const int *padding, + const int *dilations, const DataType dt, Tensor *output, StatsFuture *future) { @@ -140,6 +141,7 @@ extern void Conv2dOpenclK1x1S2(const Tensor *input, const Tensor *bias, const bool fused_relu, const int *padding, + const int *dilations, const DataType dt, Tensor *output, StatsFuture *future) { diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index 6902448a1888e7cec9adea05ed239321a9b5cc15..f48c7fc3450928a3245da17e09fa20e3aac9c326 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -15,8 +15,8 @@ namespace kernels { static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output, - StatsFuture *future) { + const int *dilations, const DataType dt, + Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -53,6 +53,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, conv_2d_kernel.setArg(idx++, static_cast(width)); conv_2d_kernel.setArg(idx++, padding[0] / 2); conv_2d_kernel.setArg(idx++, padding[1] / 2); + conv_2d_kernel.setArg(idx++, dilations[0]); + conv_2d_kernel.setArg(idx++, dilations[1]); const uint32_t gws[3] = {static_cast(channel_blocks), static_cast(width_blocks), @@ -121,10 +123,11 @@ void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *bias, const bool fused_relu, const int *padding, + const int *dilations, const DataType dt, Tensor *output, StatsFuture *future) { - Conv2d3x3S12(input, filter, bias, fused_relu, 1, padding, dt, output, future); + Conv2d3x3S12(input, filter, bias, fused_relu, 1, padding, dilations, dt, output, future); }; void Conv2dOpenclK3x3S2(const Tensor *input, @@ -132,10 +135,11 @@ void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *bias, const bool fused_relu, const int *padding, + const int *dilations, const DataType dt, Tensor *output, StatsFuture *future) { - Conv2d3x3S12(input, filter, bias, fused_relu, 2, padding, dt, output, future); + Conv2d3x3S12(input, filter, bias, fused_relu, 2, padding, dilations, dt, output, future); }; } // namespace kernels diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index 1ad8b194eb9428cc473a36bf0a73cb1cf3f09d62..2bd897f0a7eaf9203466016e9308d344fbe2889e 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -15,8 +15,8 @@ namespace kernels { void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output, - StatsFuture *future) { + const int *dilations, const DataType dt, + Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -55,6 +55,8 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter, conv_2d_kernel.setArg(idx++, static_cast(filter->dim(1))); conv_2d_kernel.setArg(idx++, padding[0] / 2); conv_2d_kernel.setArg(idx++, padding[1] / 2); + conv_2d_kernel.setArg(idx++, dilations[0]); + conv_2d_kernel.setArg(idx++, dilations[1]); const uint32_t gws[3] = {static_cast(channel_blocks), static_cast(width_blocks), diff --git a/mace/kernels/opencl/fused_conv_2d_opencl.cc b/mace/kernels/opencl/fused_conv_2d_opencl.cc index f2bd514967d1359397762f4775a4d498af3b1ea7..2f4e608699042b72af545dda471dcd843a2cfdf5 100644 --- a/mace/kernels/opencl/fused_conv_2d_opencl.cc +++ b/mace/kernels/opencl/fused_conv_2d_opencl.cc @@ -10,33 +10,33 @@ namespace kernels { extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, + const int *padding, const int *dilations, + const DataType dt, Tensor *output, StatsFuture *future); extern void Conv2dOpenclK1x1S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, + const int *padding, const int *dilations, + const DataType dt, Tensor *output, StatsFuture *future); extern void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, + const int *padding, const int *dilations, + const DataType dt, Tensor *output, StatsFuture *future); extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, + const int *padding, const int *dilations, + const DataType dt, Tensor *output, StatsFuture *future); extern void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output, - StatsFuture *future); + const int *dilations, const DataType dt, + Tensor *output, StatsFuture *future); template void FusedConv2dFunctor::operator()(const Tensor *input, @@ -46,8 +46,9 @@ void FusedConv2dFunctor::operator()(const Tensor *input, StatsFuture *future) { typedef void (*Conv2dOpenclFunction)(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, - const int *padding, const DataType dt, - Tensor *output, StatsFuture *future); + const int *padding, const int *dilations, + const DataType dt, Tensor *output, + StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5][2] = { {Conv2dOpenclK1x1S1, Conv2dOpenclK1x1S2}, @@ -57,12 +58,14 @@ void FusedConv2dFunctor::operator()(const Tensor *input, {nullptr, nullptr}}; index_t kernel_h = filter->dim(0); index_t kernel_w = filter->dim(1); - if (!input->is_image() || strides_[0] != strides_[1] || - strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1) { + if (!input->is_image() || strides_[0] != strides_[1] || strides_[0] > 2 || + (dilations_[0] > 1 && (strides_[0] > 1 || kernel_h == 1))) { LOG(WARNING) << "OpenCL conv2d kernel with " << "filter" << kernel_h << "x" << kernel_w << "," << " stride " << strides_[0] << "x" << strides_[1] - << " is not implemented yet, using slow version"; + << ",dilations " << dilations_[0] << "x" << dilations_[1] + << " and input image: " << input->is_image() + << " is not implemented yet."; MACE_NOT_IMPLEMENTED; } @@ -79,11 +82,11 @@ void FusedConv2dFunctor::operator()(const Tensor *input, if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1][strides_[0] - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_func(input, filter, bias, true, paddings.data(), + conv2d_func(input, filter, bias, true, paddings.data(), dilations_, DataTypeToEnum::value, output, future); } else { Conv2dOpencl(input, filter, bias, true, strides_[0], paddings.data(), - DataTypeToEnum::value, output, future); + dilations_, DataTypeToEnum::value, output, future); } } diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index e39c9b740049e96af68e04315872d6b5c30e98a3..f137da6e911ff3ffc986393f19c2f8326077e03c 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -2,6 +2,7 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include #include "mace/ops/conv_2d.h" #include "mace/ops/ops_test_util.h" @@ -564,19 +565,20 @@ TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { template static void TestHalfComplexConvNxNS12(const std::vector &input_shape, - const std::vector &filter_shape) { + const std::vector &filter_shape, + const std::vector &dilations) { testing::internal::LogToStderr(); srand(time(NULL)); auto func = [&](int stride_h, int stride_w, Padding padding) { // generate random input - index_t batch = 3 + (rand() % 10); + index_t batch = 3; index_t height = input_shape[0]; index_t width = input_shape[1]; index_t kernel_h = filter_shape[0]; index_t kernel_w = filter_shape[1]; - index_t input_channels = filter_shape[2] + (rand() % 10); - index_t output_channels = filter_shape[3] + (rand() % 10); + index_t input_channels = filter_shape[2]; + index_t output_channels = filter_shape[3]; // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -586,7 +588,7 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) + .AddIntsArg("dilations", {dilations[0], dilations[1]}) .Finalize(net.NewOperatorDef()); std::vector float_input_data; @@ -619,7 +621,7 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, .Output("OutputImage") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) + .AddIntsArg("dilations", {dilations[0], dilations[1]}) .AddIntArg("T", static_cast(DataType::DT_HALF)) .Finalize(net.NewOperatorDef()); // Run on device @@ -630,43 +632,154 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); }; - for (int stride : {1, 2}) { - func(stride, stride, VALID); - func(stride, stride, SAME); + func(1, 1, VALID); + func(1, 1, SAME); + if (dilations[0] == 1) { + func(2, 2, VALID); + func(2, 2, SAME); } } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) { TestHalfComplexConvNxNS12({32, 32}, - {1, 1, 32, 64}); + {1, 1, 32, 64}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { TestHalfComplexConvNxNS12({32, 32}, - {3, 3, 32, 64}); + {3, 3, 32, 64}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) { TestHalfComplexConvNxNS12({32, 32}, - {15, 1, 256, 2}); + {15, 1, 256, 2}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) { TestHalfComplexConvNxNS12({32, 32}, - {1, 15, 256, 2}); + {1, 15, 256, 2}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x75S12) { TestHalfComplexConvNxNS12({32, 32}, - {7, 7, 3, 64}); + {7, 7, 3, 64}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) { TestHalfComplexConvNxNS12({107, 113}, - {1, 1, 5, 7}); + {1, 1, 5, 7}, + {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { TestHalfComplexConvNxNS12({107, 113}, - {3, 3, 5, 7}); + {3, 3, 5, 7}, + {1, 1}); +} + +TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { + TestHalfComplexConvNxNS12({64, 64}, + {5, 5, 16, 16}, + {2, 2}); +} + +TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) { + TestHalfComplexConvNxNS12({64, 64}, + {7, 7, 16, 16}, + {2, 2}); +} + +TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) { + TestHalfComplexConvNxNS12({63, 67}, + {7, 7, 16, 16}, + {4, 4}); +} + +template +static void TestDilationConvNxN(const std::vector &shape, const int dilation_rate) { + testing::internal::LogToStderr(); + auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, + Padding type) { + srand(time(NULL)); + + // generate random input + index_t batch = 1; + index_t height = shape[0]; + index_t width = shape[1]; + index_t input_channels = shape[2]; + index_t output_channels = shape[3]; + // Construct graph + OpsTestNet net; + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation_rate, dilation_rate}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, input_channels}); + net.AddRandomInput( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}); + net.AddRandomInput("Bias", {output_channels}); + + // run on cpu + net.RunOp(); + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // run on gpu + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("Conv2D", "Conv2dTest") + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation_rate, dilation_rate}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + // Run on device + net.RunOp(D); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); + }; + + for (int kernel_size : {3}) { + for (int stride : {1}) { + func(kernel_size, kernel_size, stride, stride, VALID); + func(kernel_size, kernel_size, stride, stride, SAME); + } + } } + +TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) { + TestDilationConvNxN({32, 32, 32, 64}, + 2); +} + +TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) { + TestDilationConvNxN({128, 128, 16, 16}, + 4); +} + +TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) { + TestDilationConvNxN({107, 113, 5, 7}, + 4); +} + diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index 7ce58e6ce18b34f5c2c4f8b97de3ff2cb3f0e508..f1effb3ee99cb9dd6353c1beae5f581515a87125 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -486,3 +486,160 @@ TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) { {15, 1, 32, 64}); } +template +static void TestAtrousConvNxN(const std::vector &shape, const int dilation) { + testing::internal::LogToStderr(); + auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, + Padding type) { + srand(time(NULL)); + + // generate random input + index_t batch = 1; + index_t height = shape[0]; + index_t width = shape[1]; + index_t input_channels = shape[2]; + index_t output_channels = shape[3]; + // Construct graph + OpsTestNet net; + OpDefBuilder("FusedConv2D", "FusedConv2dTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation, dilation}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, input_channels}); + net.AddRandomInput( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}); + net.AddRandomInput("Bias", {output_channels}); + + // run on cpu + net.RunOp(); + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // run on gpu + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("FusedConv2D", "FusedConv2dTest") + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation, dilation}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + // Run on device + net.RunOp(D); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); + }; + + for (int kernel_size : {3}) { + for (int stride : {1}) { + func(kernel_size, kernel_size, stride, stride, VALID); + func(kernel_size, kernel_size, stride, stride, SAME); + } + } +} + +TEST_F(FusedConv2dOpTest, OPENCLalignedAtrousConvNxN2) { + TestAtrousConvNxN({128, 128, 16, 16}, 2); +} + +TEST_F(FusedConv2dOpTest, OPENCLalignedAtrousConvNxN4) { + TestAtrousConvNxN({128, 128, 16, 16}, 4); +} + +TEST_F(FusedConv2dOpTest, OPENCLUnalignedAtrousConvNxN) { + TestAtrousConvNxN({107, 113, 5, 7}, 2); +} + +template +static void TestGeneralHalfAtrousConv(const std::vector &image_shape, + const std::vector &filter_shape, + const std::vector &dilations) { + testing::internal::LogToStderr(); + auto func = [&](int stride_h, int stride_w, Padding type) { + srand(time(NULL)); + + // generate random input + index_t batch = 1; + index_t height = image_shape[0]; + index_t width = image_shape[1]; + index_t input_channels = filter_shape[2]; + index_t output_channels = filter_shape[3]; + index_t kernel_h = filter_shape[0]; + index_t kernel_w = filter_shape[1]; + // Construct graph + OpsTestNet net; + OpDefBuilder("FusedConv2D", "FusedConv2dTest") + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, input_channels}); + net.AddRandomInput( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}); + net.AddRandomInput("Bias", {output_channels}); + + // run on cpu + net.RunOp(); + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // run on gpu + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("FusedConv2D", "FusedConv2dTest") + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + // Run on device + net.RunOp(D); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.7); + }; + + func(1, 1, VALID); + func(1, 1, SAME); +} + +TEST_F(FusedConv2dOpTest, OPENCL7X7AtrousConvD2) { + TestGeneralHalfAtrousConv({32, 32}, + {7, 7, 3, 16}, + {2, 2}); +} + +TEST_F(FusedConv2dOpTest, OPENCL15X15AtrousConvD4) { + TestGeneralHalfAtrousConv({63, 71}, + {15, 15, 16, 16}, + {2, 2}); +} diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index ea86167ab681d8deab7589777bfb3135bf51da75..e9f2fd048598de6edf4921ebee562d46036e73bb 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -322,18 +322,25 @@ struct Expector { Tensor::MappingGuard y_mapper(&y); auto a = x.data(); auto b = y.data(); - for (int n = 0; n < x.dim(0); ++n) { - for (int h = 0; h < x.dim(1); ++h) { - for (int w = 0; w < x.dim(2); ++w) { - for (int c = 0; c < x.dim(3); ++c) { - EXPECT_NEAR(*a, *b, abs_err) << "with index = [" - << n << ", " << h << ", " - << w << ", " << c << "]"; - a++; - b++; + if (x.dim_size() == 4) { + for (int n = 0; n < x.dim(0); ++n) { + for (int h = 0; h < x.dim(1); ++h) { + for (int w = 0; w < x.dim(2); ++w) { + for (int c = 0; c < x.dim(3); ++c) { + EXPECT_NEAR(*a, *b, abs_err) << "with index = [" + << n << ", " << h << ", " + << w << ", " << c << "]"; + a++; + b++; + } } } } + } else { + for (int i = 0; i < x.size(); ++i) { + EXPECT_NEAR(a[i], b[i], abs_err) << "a = " << a << " b = " << b + << " index = " << i; + } } } diff --git a/mace/python/tools/tf_converter.py b/mace/python/tools/tf_converter.py index 1251bf55f61c5b674b6bab538e36f485cad383b8..d258f7bb7be5f39426075976c9987dfb17674b1c 100644 --- a/mace/python/tools/tf_converter.py +++ b/mace/python/tools/tf_converter.py @@ -21,7 +21,6 @@ def main(unused_args): data = f.read() input_graph_def.ParseFromString(data) - print 'done' if FLAGS.runtime == 'dsp': output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb( input_graph_def, FLAGS.input_node, FLAGS.output_node, FLAGS.prequantize) @@ -38,6 +37,7 @@ def main(unused_args): with gfile.GFile(FLAGS.output + '_txt', "wb") as f: # output_graph_def.ClearField('tensors') f.write(str(output_graph_def)) + print("Model conversion is completed.") def parse_args(): @@ -82,7 +82,7 @@ def parse_args(): parser.add_argument( "--output_type", type=str, - default="source", + default="pb", help="output type: source/pb") parser.add_argument( "--template", diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index 0378c018b3bc33564bcc3cfd80669e1082309be0..e224b6112234464df3f4b5411303d9cff0c36579 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -44,6 +44,7 @@ class TFConverter(object): self.device = device self.tf_graph = {} self.resolved_ops = {} + self.unused_tensor = set() for op in tf_ops: self.resolved_ops[op.name] = 0 @@ -72,6 +73,23 @@ class TFConverter(object): arg.i = self.dt return output_name + def add_image_to_buffer(self, input_name, input_type): + output_name = input_name[:-2] + "_i2b" + input_name[-2:] + op_def = self.net_def.op.add() + op_def.name = output_name[:-2] + op_def.type = 'ImageToBuffer' + op_def.input.extend([input_name]) + op_def.output.extend([output_name]) + + arg = op_def.arg.add() + arg.name = 'buffer_type' + arg.i = buffer_type_map[input_type] + arg = op_def.arg.add() + arg.name = 'T' + arg.i = self.dt + return output_name + + def add_input_transform(self, name): new_input_name = MACE_INPUT_NODE_NAME + ":0" op_def = self.net_def.op.add() @@ -111,22 +129,23 @@ class TFConverter(object): op.output_shape.extend(output_shapes) def convert_tensor(self, op): - tensor = self.net_def.tensors.add() - tf_tensor = op.outputs[0].eval() - tensor.name = op.outputs[0].name - - shape = list(tf_tensor.shape) - tensor.dims.extend(shape) - - tf_dt = op.get_attr('dtype') - if tf_dt == tf.float32: - tensor.data_type = mace_pb2.DT_FLOAT - tensor.float_data.extend(tf_tensor.astype(np.float32).flat) - elif tf_dt == tf.int32: - tensor.data_type = mace_pb2.DT_INT32 - tensor.int32_data.extend(tf_tensor.astype(np.int32).flat) - else: - raise Exception("Not supported tensor type: " + tf_dt.name) + if op.outputs[0].name not in self.unused_tensor: + tensor = self.net_def.tensors.add() + tf_tensor = op.outputs[0].eval() + tensor.name = op.outputs[0].name + + shape = list(tf_tensor.shape) + tensor.dims.extend(shape) + + tf_dt = op.get_attr('dtype') + if tf_dt == tf.float32: + tensor.data_type = mace_pb2.DT_FLOAT + tensor.float_data.extend(tf_tensor.astype(np.float32).flat) + elif tf_dt == tf.int32: + tensor.data_type = mace_pb2.DT_INT32 + tensor.int32_data.extend(tf_tensor.astype(np.int32).flat) + else: + raise Exception("Not supported tensor type: " + tf_dt.name) self.resolved_ops[op.name] = 1 def convert_conv2d(self, op): @@ -253,6 +272,7 @@ class TFConverter(object): data_format_arg = op_def.arg.add() data_format_arg.name = 'data_format' data_format_arg.s = 'NHWC' + self.unused_tensor.add(get_input_tensor(op, 1).name) self.net_def.op.extend([op_def]) for i in range(0, 7): @@ -326,6 +346,7 @@ class TFConverter(object): axis_arg.i = get_input_tensor(op, 2).eval().astype(np.int32) self.add_output_shape(op.outputs, op_def) self.resolved_ops[op.name] = 1 + self.unused_tensor.add(get_input_tensor(op, 2).name) def convert_resize_bilinear(self, op): op_def = self.net_def.op.add() @@ -344,6 +365,7 @@ class TFConverter(object): size_arg.i = op.get_attr('align_corners') self.add_output_shape(op.outputs, op_def) self.resolved_ops[op.name] = 1 + self.unused_tensor.add(get_input_tensor(op, 1).name) def convert_bias_add(self, op): op_def = mace_pb2.OperatorDef() @@ -383,6 +405,79 @@ class TFConverter(object): size_arg.ints.extend(get_input_tensor(op, 2).eval().astype(np.int32).flat) self.add_output_shape(op.outputs, op_def) self.resolved_ops[op.name] = 1 + self.unused_tensor.add(get_input_tensor(op, 1).name) + self.unused_tensor.add(get_input_tensor(op, 2).name) + + def is_atrous_conv2d(self, op): + return op.type == 'SpaceToBatchND' and\ + len(self.tf_graph[op.name]) == 1 and self.tf_graph[op.name][0].type == 'Conv2D' + + def convert_atrous_conv2d(self, op): + op_def = mace_pb2.OperatorDef() + arg = op_def.arg.add() + arg.name = 'T' + arg.i = self.dt + conv_op = self.tf_graph[op.name][0] + op_def.name = conv_op.name + op_def.type = conv_op.type + if self.device == 'gpu': + op_def.input.extend([op.inputs[0].name]) + output_name = self.add_buffer_to_image(conv_op.inputs[1].name, "FILTER") + op_def.input.extend([output_name]) + else: + op_def.input.extend([op.inputs[0].name]) + op_def.input.extend([conv_op.inputs[1].name]) + + dilation_arg = op_def.arg.add() + dilation_arg.name = 'dilations' + dilation_arg.ints.extend(get_input_tensor(op, 1).eval().astype(np.int32).flat) + padding_arg = op_def.arg.add() + padding_arg.name = 'padding' + padding_values = get_input_tensor(op, 2).eval().astype(np.int32).flat + if len(padding_values) > 0 and padding_values[0] > 0: + padding_arg.i = padding_mode['SAME'] + else: + padding_arg.i = padding_mode['VALID'] + self.unused_tensor.add(get_input_tensor(op, 1).name) + self.unused_tensor.add(get_input_tensor(op, 2).name) + + strides_arg = op_def.arg.add() + strides_arg.name = 'strides' + strides_arg.ints.extend([1, 1]) + data_format_arg = op_def.arg.add() + data_format_arg.name = 'data_format' + data_format_arg.s = 'NHWC' + final_op = conv_op + self.resolved_ops[op.name] = 1 + self.resolved_ops[conv_op.name] = 1 + + if len(self.tf_graph[final_op.name]) == 1 and self.tf_graph[final_op.name][0].type == 'BiasAdd' : + bias_add_op = self.tf_graph[final_op.name][0] + if self.device == 'gpu': + output_name = self.add_buffer_to_image(bias_add_op.inputs[1].name, "ARGUMENT") + op_def.input.extend([output_name]) + else: + op_def.input.extend([bias_add_op.inputs[1].name]) + final_op = bias_add_op + self.resolved_ops[bias_add_op.name] = 1 + + if len(self.tf_graph[final_op.name]) == 1 \ + and self.tf_graph[final_op.name][0].type == 'BatchToSpaceND': + final_op = self.tf_graph[final_op.name][0] + self.resolved_ops[final_op.name] = 1 + else: + raise Exception('Convert atrous conv error: no BatchToSpaceND op') + + if len(self.tf_graph[final_op.name]) == 1 \ + and self.tf_graph[final_op.name][0].type == 'Relu': + relu_op = self.tf_graph[final_op.name][0] + op_def.type = "FusedConv2D" + final_op = relu_op + self.resolved_ops[relu_op.name] = 1 + + op_def.output.extend([output.name for output in final_op.outputs]) + self.add_output_shape(final_op.outputs, op_def) + self.net_def.op.extend([op_def]) def convert_normal_op(self, op): op_def = self.net_def.op.add() @@ -407,7 +502,9 @@ class TFConverter(object): self.resolved_ops[op.name] = 1 pass elif op.type == 'Const': - self.convert_tensor(op) + pass + elif self.is_atrous_conv2d(op): + self.convert_atrous_conv2d(op) elif op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative': self.convert_conv2d(op) elif op.type == 'FusedBatchNorm': @@ -435,6 +532,15 @@ class TFConverter(object): else: raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) + + for op in self.tf_ops: + if self.resolved_ops[op.name] == 1: + continue + elif op.type == 'Const': + self.convert_tensor(op) + else: + raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) + if self.device == 'gpu': self.add_output_transform(output_node) diff --git a/tools/validate.py b/tools/validate.py index c1560a16e85b91906106c54438aa0cbf6a86df7b..46ada3d1606faca8db5c0de65fe025102cbc5fb9 100644 --- a/tools/validate.py +++ b/tools/validate.py @@ -10,8 +10,8 @@ from tensorflow import gfile # Validation Flow: # 1. Generate input data -# python validate_icnet.py --generate_data 1 \ -# --random_seed 1 +# python validate_icnet.py --generate_data 1 +# # 2. Use mace_run to run icnet on phone. # 3. adb pull the result. # 4. Compare output data of mace and tf @@ -20,7 +20,7 @@ from tensorflow import gfile # --mace_out_file icnet.out def generate_data(shape): - np.random.seed(FLAGS.random_seed) + np.random.seed() data = np.random.random(shape) * -1 print FLAGS.input_file data.astype(np.float32).tofile(FLAGS.input_file) @@ -122,12 +122,7 @@ def parse_args(): "--generate_data", type='bool', default="false", - help="Random seed for generate test case.") - parser.add_argument( - "--random_seed", - type=int, - default="0", - help="Random seed for generate test case.") + help="Generate data or not.") return parser.parse_known_args() diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 1359a356bc84b89b6c711d2ab1e2108e4ddb99d3..35973abf42d8813713c14395640e21957b8d5a1f 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -79,7 +79,8 @@ build_and_run() } echo "Step 1: Generate input data" -python tools/validate.py --generate_data true --random_seed 1 \ +rm -rf ${MODEL_DIR}/${INPUT_FILE_NAME} +python tools/validate.py --generate_data true \ --input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \ --input_shape="${IMAGE_SIZE},${IMAGE_SIZE},3"