提交 6cc1d13d 编写于 作者: L Liangliang He

Update opencl kernel

上级 2a7274f4
......@@ -14,15 +14,11 @@ cc_library(
srcs = glob([
"runtime/opencl/cl.hpp",
"runtime/opencl/cl2.hpp",
"runtime/opencl/opencl_allocator.cc",
"runtime/opencl/opencl_wrapper.cc",
"runtime/opencl/opencl_runtime.cc",
"runtime/opencl/*.cc",
]),
hdrs = glob([
"runtime/opencl/*.h",
]),
hdrs = [
"runtime/opencl/opencl_allocator.h",
"runtime/opencl/opencl_runtime.h",
"runtime/opencl/opencl_wrapper.h",
],
copts = ["-std=c++11"],
deps = [
"core",
......@@ -31,17 +27,6 @@ cc_library(
alwayslink = 1,
)
cc_binary(
name = "opencl_smoketest",
srcs = glob([
"runtime/opencl/opencl_smoketest.cc",
]),
copts = ["-std=c++11"],
deps = [
"opencl_runtime",
],
)
cc_library(
name = "core",
srcs = glob([
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/logging.h"
#include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
int main() {
using namespace mace;
auto runtime = mace::OpenCLRuntime::Get();
mace::Tensor ta(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32);
mace::Tensor tb(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32);
mace::Tensor tc(GetDeviceAllocator(DeviceType::OPENCL), DataType::DT_INT32);
mace::Tensor tstep(GetDeviceAllocator(DeviceType::OPENCL),
DataType::DT_INT32);
int n = 1000;
std::vector<index_t> shape = {n};
ta.Resize(shape);
tb.Resize(shape);
tc.Resize(shape);
tstep.Resize({1});
int step_size = 10;
int global_size = n / step_size;
{
mace::Tensor::MappingGuard ta_mapper(&ta);
mace::Tensor::MappingGuard tb_mapper(&tb);
mace::Tensor::MappingGuard tstep_mapper(&tstep);
int32_t *a = ta.mutable_data<int32_t>();
int32_t *b = tb.mutable_data<int32_t>();
int32_t *step = tstep.mutable_data<int32_t>();
for (int i = 0; i < n; i++) {
a[i] = i;
b[i] = 2 * i;
}
step[0] = step_size;
}
auto program = runtime->program();
auto simple_add =
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer>(
program, "simple_add");
cl_int error;
simple_add(cl::EnqueueArgs(runtime->command_queue(), cl::NDRange(global_size),
cl::NullRange),
*(static_cast<cl::Buffer *>(ta.buffer())),
*(static_cast<cl::Buffer *>(tb.buffer())),
*(static_cast<cl::Buffer *>(tc.buffer())),
*(static_cast<cl::Buffer *>(tstep.buffer())), error);
if (error != 0) {
LOG(ERROR) << "Failed to execute kernel " << error;
}
{
mace::Tensor::MappingGuard ta_mapper(&ta);
mace::Tensor::MappingGuard tb_mapper(&tb);
mace::Tensor::MappingGuard tc_mapper(&tc);
int32_t *a = ta.mutable_data<int32_t>();
int32_t *b = tb.mutable_data<int32_t>();
int32_t *c = tc.mutable_data<int32_t>();
bool correct = true;
for (int i = 0; i < n; i++) {
if (c[i] != a[i] + b[i]) correct = false;
}
LOG(INFO) << "OpenCL test result: " << (correct ? "correct" : "incorrect");
}
return 0;
}
......@@ -28,20 +28,11 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) {
float weights = filter[out_chan * in_chan_num + in_chan];
float *output_ptr = output_base + out_chan * pixel_num;
/* TODO fix vload/vstore */
/*
for (int p = 0; p < 2; ++p) {
float4 in = vload4(p * 4, input_ptr);
float4 out = vload4(p * 4, output_ptr);
float4 in = vload4(p, input_ptr);
float4 out = vload4(p, output_ptr);
out += in * weights;
vstore4(out, p * 4, output_ptr);
}
*/
for (int p = 0; p < 8; ++p) {
float in = input_ptr[p];
float out = output_ptr[p];
out += in * weights;
output_ptr[p] = out;
vstore4(out, p, output_ptr);
}
}
} else {
......
void kernel simple_add(global const int *a,
global const int *b,
global int *c,
global const int *step) {
int id = get_global_id(0);
int start = step[0] * id;
int stop = start + step[0];
for (int i = start; i < stop; i++) c[i] = a[i] + b[i];
}
......@@ -81,7 +81,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
cl_int error;
conv_2d(cl::EnqueueArgs(runtime->command_queue(),
cl::NDRange(chan_blk_num, pixel_blk_num),
cl::NullRange),
cl::NDRange(1, 64)),
*(static_cast<cl::Buffer *>(input->buffer())),
*(static_cast<cl::Buffer *>(filter->buffer())),
*(static_cast<cl::Buffer *>(output->buffer())),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册