diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 877fa040624b1eae2d48569b8cd23379951495d8..c8ecb5b882224198a0c34b598f3f731a9cc5b2cf 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -20,6 +20,7 @@ cc_library( exclude = [ "*_test.cc", "arm/*_test.cc", + "opencl/*_test.cc", ], ), hdrs = glob([ @@ -42,6 +43,7 @@ cc_test( [ "*_test.cc", "arm/*_test.cc", + "opencl/*_test.cc", ], ), copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]), diff --git a/mace/kernels/opencl/out_of_range_check_test.cc b/mace/kernels/opencl/out_of_range_check_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..55d90196c33bd190a8cbbb8bc58db706b3e57129 --- /dev/null +++ b/mace/kernels/opencl/out_of_range_check_test.cc @@ -0,0 +1,150 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include + +#include "gtest/gtest.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/core/tensor.h" +#include "mace/core/workspace.h" +#include "mace/kernels/opencl/helper.h" +#include "mace/utils/tuner.h" +#include "mace/utils/utils.h" + +namespace mace { +namespace kernels { +namespace { + +const bool BufferToImageOpImpl(Tensor *buffer, + Tensor *image, + std::vector &image_shape) { + std::unique_ptr kernel_error; + uint32_t gws[2] = {static_cast(image_shape[0]), + static_cast(image_shape[1])}; + + auto runtime = OpenCLRuntime::Global(); + + std::string kernel_name = "in_out_buffer_to_image"; + std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name); + std::set built_options; + std::stringstream kernel_name_ss; + kernel_name_ss << "-D" << kernel_name << "=" << obfuscated_kernel_name; + built_options.emplace(kernel_name_ss.str()); + if (runtime->IsNonUniformWorkgroupsSupported()) { + built_options.emplace("-DNON_UNIFORM_WORK_GROUP"); + } + if (buffer->dtype() == image->dtype()) { + built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToCLCMDDt(DataTypeToEnum::value)); + } else { + built_options.emplace("-DDATA_TYPE=" + + DtToUpstreamCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); + } + if (runtime->IsOutOfRangeCheckEnabled()) { + built_options.emplace("-DOUT_OF_RANGE_CHECK"); + kernel_error = std::move(std::unique_ptr( + new Buffer(GetDeviceAllocator(DeviceType::OPENCL), 1))); + kernel_error->Map(nullptr); + *(kernel_error->mutable_data()) = '0'; + kernel_error->UnMap(); + } + + auto b2f_kernel = runtime->BuildKernel("buffer_to_image", + obfuscated_kernel_name, built_options); + + uint32_t idx = 0; + if (runtime->IsOutOfRangeCheckEnabled()) { + b2f_kernel.setArg(idx++, + *(static_cast(kernel_error->buffer()))); + } + if (!runtime->IsNonUniformWorkgroupsSupported()) { + b2f_kernel.setArg(idx++, gws[0]); + b2f_kernel.setArg(idx++, gws[1]); + } + b2f_kernel.setArg(idx++, *(buffer->opencl_buffer())); + MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, + "buffer offset not aligned"); + b2f_kernel.setArg(idx++, + static_cast(buffer->buffer_offset() / + GetEnumTypeSize(buffer->dtype()))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(1))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); + b2f_kernel.setArg(idx++, *(image->opencl_image())); + + const uint32_t kwg_size = + static_cast(runtime->GetKernelMaxWorkGroupSize(b2f_kernel)); + const std::vector lws = {16, kwg_size / 16}; + + cl::Event event; + cl_int error; + if (runtime->IsNonUniformWorkgroupsSupported()) { + error = runtime->command_queue().enqueueNDRangeKernel( + b2f_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1]), + cl::NDRange(lws[0], lws[1]), nullptr, &event); + } else { + std::vector roundup_gws(lws.size()); + for (size_t i = 0; i < lws.size(); ++i) { + roundup_gws[i] = RoundUp(gws[i], lws[i]); + } + + error = runtime->command_queue().enqueueNDRangeKernel( + b2f_kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]), + cl::NDRange(lws[0], lws[1]), nullptr, &event); + } + MACE_CHECK_CL_SUCCESS(error); + + runtime->command_queue().finish(); + bool is_out_of_range = false; + if (runtime->IsOutOfRangeCheckEnabled()) { + kernel_error->Map(nullptr); + is_out_of_range = *(kernel_error->mutable_data()) == '1' ? true : false; + kernel_error->UnMap(); + } + return is_out_of_range; +} + +} // namespace + +class OutOfRangeCheckTest : public ::testing::Test { + protected: + virtual void SetUp() { + setenv("MACE_OUT_OF_RANGE_CHECK", "1", 1); + } +}; + +TEST(OutOfRangeCheckTest, RandomTest) { + static unsigned int seed = time(NULL); + index_t batch = 11 + rand_r(&seed) % 10; + index_t height = 12 + rand_r(&seed) % 100; + index_t width = 13 + rand_r(&seed) % 100; + index_t channels = 14 + rand_r(&seed) % 50; + + std::vector buffer_shape = {batch, height, width, channels}; + Workspace ws; + Tensor *buffer = ws.CreateTensor("Buffer", + GetDeviceAllocator(DeviceType::OPENCL), + DataTypeToEnum::v()); + buffer->Resize(buffer_shape); + + std::vector image_shape; + Tensor *image = ws.CreateTensor("Image", + GetDeviceAllocator(DeviceType::OPENCL), + DataTypeToEnum::v()); + CalImage2DShape(buffer->shape(), IN_OUT_CHANNEL, &image_shape); + image->ResizeImage(buffer->shape(), image_shape); + ASSERT_FALSE(BufferToImageOpImpl(buffer, image, image_shape)); + + std::vector overflow_image_shape = image_shape; + for (int i = 0; i < overflow_image_shape.size(); ++i) { + overflow_image_shape[i] += 1; + } + ASSERT_TRUE(BufferToImageOpImpl(buffer, image, overflow_image_shape)); +} + +} // namespace kernels +} // namespace mace