diff --git a/mace/core/BUILD b/mace/core/BUILD index 9316b8ed6cf409c3582756f2444081efa8a373e5..9d337bc4b8f19755dbc71f1f2a1120981b64e628 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -14,15 +14,11 @@ cc_library( srcs = glob([ "runtime/opencl/cl.hpp", "runtime/opencl/cl2.hpp", - "runtime/opencl/opencl_allocator.cc", - "runtime/opencl/opencl_wrapper.cc", - "runtime/opencl/opencl_runtime.cc", + "runtime/opencl/*.cc", + ]), + hdrs = glob([ + "runtime/opencl/*.h", ]), - hdrs = [ - "runtime/opencl/opencl_allocator.h", - "runtime/opencl/opencl_runtime.h", - "runtime/opencl/opencl_wrapper.h", - ], copts = ["-std=c++11"], deps = [ "core", @@ -31,17 +27,6 @@ cc_library( alwayslink = 1, ) -cc_binary( - name = "opencl_smoketest", - srcs = glob([ - "runtime/opencl/opencl_smoketest.cc", - ]), - copts = ["-std=c++11"], - deps = [ - "opencl_runtime", - ], -) - cc_library( name = "core", srcs = glob([ diff --git a/mace/core/runtime/opencl/opencl_smoketest.cc b/mace/core/runtime/opencl/opencl_smoketest.cc deleted file mode 100644 index ab32a81d89c462e9c15e100ff00aa9ebb382556e..0000000000000000000000000000000000000000 --- a/mace/core/runtime/opencl/opencl_smoketest.cc +++ /dev/null @@ -1,75 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - -#include "mace/core/logging.h" -#include "mace/core/operator.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/core/runtime/opencl/opencl_wrapper.h" - -int main() { - using namespace mace; - auto runtime = mace::OpenCLRuntime::Get(); - - mace::Tensor ta(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32); - mace::Tensor tb(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32); - mace::Tensor tc(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32); - mace::Tensor tstep(GetDeviceAllocator(DeviceType::OPENCL), - DataType::DT_INT32); - - int n = 1000; - std::vector shape = {n}; - ta.Resize(shape); - tb.Resize(shape); - tc.Resize(shape); - tstep.Resize({1}); - - int step_size = 10; - int global_size = n / step_size; - { - mace::Tensor::MappingGuard ta_mapper(&ta); - mace::Tensor::MappingGuard tb_mapper(&tb); - mace::Tensor::MappingGuard tstep_mapper(&tstep); - int32_t *a = ta.mutable_data(); - int32_t *b = tb.mutable_data(); - int32_t *step = tstep.mutable_data(); - for (int i = 0; i < n; i++) { - a[i] = i; - b[i] = 2 * i; - } - step[0] = step_size; - } - - auto program = runtime->program(); - - auto simple_add = - cl::KernelFunctor( - program, "simple_add"); - cl_int error; - simple_add(cl::EnqueueArgs(runtime->command_queue(), cl::NDRange(global_size), - cl::NullRange), - *(static_cast(ta.buffer())), - *(static_cast(tb.buffer())), - *(static_cast(tc.buffer())), - *(static_cast(tstep.buffer())), error); - if (error != 0) { - LOG(ERROR) << "Failed to execute kernel " << error; - } - - { - mace::Tensor::MappingGuard ta_mapper(&ta); - mace::Tensor::MappingGuard tb_mapper(&tb); - mace::Tensor::MappingGuard tc_mapper(&tc); - - int32_t *a = ta.mutable_data(); - int32_t *b = tb.mutable_data(); - int32_t *c = tc.mutable_data(); - bool correct = true; - for (int i = 0; i < n; i++) { - if (c[i] != a[i] + b[i]) correct = false; - } - LOG(INFO) << "OpenCL test result: " << (correct ? "correct" : "incorrect"); - } - - return 0; -} diff --git a/mace/kernels/opencl/cl/conv_2d_1x1.cl b/mace/kernels/opencl/cl/conv_2d_1x1.cl index abc91aa98df6a92e1cf6d2dc29527e44f0ac572b..cd9f22ae3f981823d4d45a876ee0cf18e4a0f456 100644 --- a/mace/kernels/opencl/cl/conv_2d_1x1.cl +++ b/mace/kernels/opencl/cl/conv_2d_1x1.cl @@ -28,20 +28,11 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */ for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) { float weights = filter[out_chan * in_chan_num + in_chan]; float *output_ptr = output_base + out_chan * pixel_num; - /* TODO fix vload/vstore */ - /* for (int p = 0; p < 2; ++p) { - float4 in = vload4(p * 4, input_ptr); - float4 out = vload4(p * 4, output_ptr); + float4 in = vload4(p, input_ptr); + float4 out = vload4(p, output_ptr); out += in * weights; - vstore4(out, p * 4, output_ptr); - } - */ - for (int p = 0; p < 8; ++p) { - float in = input_ptr[p]; - float out = output_ptr[p]; - out += in * weights; - output_ptr[p] = out; + vstore4(out, p, output_ptr); } } } else { diff --git a/mace/kernels/opencl/cl/simple_add.cl b/mace/kernels/opencl/cl/simple_add.cl deleted file mode 100644 index 959da77e4c999bc08f0c0eb0bb2deceaa173a481..0000000000000000000000000000000000000000 --- a/mace/kernels/opencl/cl/simple_add.cl +++ /dev/null @@ -1,9 +0,0 @@ -void kernel simple_add(global const int *a, - global const int *b, - global int *c, - global const int *step) { - int id = get_global_id(0); - int start = step[0] * id; - int stop = start + step[0]; - for (int i = start; i < stop; i++) c[i] = a[i] + b[i]; -} diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 0b64807677c2bd1f4ed4cb72bb81ea897824c53b..3d2247cffa7cf2b8f7a70a0be822e7dadbd408a1 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -81,7 +81,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, cl_int error; conv_2d(cl::EnqueueArgs(runtime->command_queue(), cl::NDRange(chan_blk_num, pixel_blk_num), - cl::NullRange), + cl::NDRange(1, 64)), *(static_cast(input->buffer())), *(static_cast(filter->buffer())), *(static_cast(output->buffer())),