提交 358ebd4d 编写于 作者: 李寅

1. Fix ResizeLike.

2. Distinguish opencl buffer and image
上级 422360c1
......@@ -241,7 +241,9 @@ class Image : public BufferBase {
mapped_buf_ = nullptr;
};
void Resize(index_t size) {}
void Resize(index_t size) {
MACE_NOT_IMPLEMENTED;
}
void Copy(void *src, index_t offset, index_t length) {
MACE_NOT_IMPLEMENTED;
......@@ -263,7 +265,11 @@ class Image : public BufferBase {
class BufferSlice : public BufferBase {
public:
BufferSlice() {}
BufferSlice()
: buffer_(nullptr),
mapped_buf_(nullptr),
offset_(0),
length_(0) {}
BufferSlice(BufferBase *buffer, index_t offset, index_t length)
: BufferBase(buffer->size()),
buffer_(buffer),
......@@ -284,12 +290,13 @@ class BufferSlice : public BufferBase {
other.length_) {}
~BufferSlice() {
if (mapped_buf_ != nullptr) {
if (buffer_ != nullptr && mapped_buf_ != nullptr) {
UnMap();
}
}
void *buffer() {
MACE_CHECK_NOTNULL(buffer_);
return buffer_->buffer();
};
......@@ -330,6 +337,7 @@ class BufferSlice : public BufferBase {
};
void Resize(index_t size) {
MACE_NOT_IMPLEMENTED;
}
void Copy(void *src, index_t offset, index_t length) {
......
......@@ -5,6 +5,7 @@
#ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/buffer.h"
#include "mace/utils/logging.h"
#include "mace/core/types.h"
......@@ -112,10 +113,24 @@ class Tensor {
return size() * SizeOfType();
}
inline void *buffer() const {
MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr,
"buffer is null");
return buffer_->buffer();
inline bool has_opencl_image() const {
return buffer_ != nullptr && !buffer_->OnHost()
&& typeid(*buffer_) == typeid(Image);
}
inline bool has_opencl_buffer() const {
return buffer_ != nullptr && !buffer_->OnHost()
&& !has_opencl_image();
}
inline cl::Image *opencl_image() const {
MACE_CHECK(has_opencl_image(), "do not have image");
return static_cast<cl::Image*>(buffer_->buffer());
}
inline cl::Buffer *opencl_buffer() const {
MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer");
return static_cast<cl::Buffer*>(buffer_->buffer());
}
inline index_t buffer_offset() const {
......@@ -152,6 +167,7 @@ class Tensor {
inline void Resize(const std::vector<index_t> &shape) {
shape_ = shape;
if (buffer_ != nullptr) {
MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
buffer_->Resize(raw_size());
} else {
buffer_ = new Buffer(allocator_, raw_size());
......@@ -159,20 +175,38 @@ class Tensor {
}
}
inline void ResizeLike(const Tensor &other) {
Resize(other.shape());
}
inline void ResizeLike(const Tensor *other) {
Resize(other->shape());
}
inline void ResizeImage(const std::vector<index_t> &shape,
const std::vector<size_t> &image_shape) {
shape_ = shape;
if (buffer_ == nullptr) {
buffer_ = new Image(image_shape, dtype_);
is_buffer_owner_ = true;
} else {
MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize.");
Image *image = dynamic_cast<Image*>(buffer_);
MACE_CHECK(image_shape[0] <= image->image_shape()[0]
&& image_shape[1] <= image->image_shape()[1]);
}
}
inline void ResizeLike(const Tensor &other) {
ResizeLike(&other);
}
inline void ResizeLike(const Tensor *other) {
if (other->has_opencl_image()) {
if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) {
delete buffer_;
buffer_ = nullptr;
}
ResizeImage(other->shape(),
dynamic_cast<Image *>(other->UnderlyingBuffer())->image_shape());
} else {
if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) {
delete buffer_;
buffer_ = nullptr;
}
Resize(other->shape());
}
}
......
......@@ -60,12 +60,12 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_ =
runtime->BuildKernel("activation", kernel_name, built_options);
int idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel_.setArg(idx++, *(input->opencl_image()));
if (activation_ == PRELU) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(alpha->buffer())));
kernel_.setArg(idx++, *(alpha->opencl_image()));
}
kernel_.setArg(idx++, static_cast<float>(relux_max_limit_));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(output->opencl_image()));
}
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
......
......@@ -58,9 +58,9 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t idx = 0;
for (auto input : input_tensors) {
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
*(input->opencl_image()));
}
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
kernel_.setArg(idx++, *(output_tensor->opencl_image()));
}
const uint32_t gws[2] = {
......
......@@ -64,17 +64,17 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
runtime->BuildKernel("batch_norm", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(scale->buffer())));
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(scale->opencl_image()));
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(offset->buffer())));
*(offset->opencl_image()));
if (!folded_constant_) {
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(mean->buffer())));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(var->buffer())));
*(mean->opencl_image()));
kernel_.setArg(idx++, *(var->opencl_image()));
kernel_.setArg(idx++, epsilon);
}
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit_);
}
......
......@@ -35,9 +35,9 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(bias->opencl_image()));
kernel_.setArg(idx++, *(output->opencl_image()));
}
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
......
......@@ -77,7 +77,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
built_options);
uint32_t idx = 0;
b2f_kernel.setArg(idx++, *(static_cast<const cl::Buffer *>(buffer->buffer())));
b2f_kernel.setArg(idx++, *(buffer->opencl_buffer()));
if (!i2b_) {
MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned");
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype())));
......@@ -93,8 +93,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(2)));
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(3)));
}
b2f_kernel.setArg(idx++, *(static_cast<cl::Image2D *>(image->buffer())));
b2f_kernel.setArg(idx++, *(image->opencl_image()));
const std::vector<uint32_t> lws = {16, 64};
cl::Event event;
cl_int error = runtime->command_queue().enqueueNDRangeKernel(
......
......@@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel,
*kernel = runtime->BuildKernel("concat", kernel_name, built_options);
uint32_t idx = 0;
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->buffer())));
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->buffer())));
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->opencl_image())));
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->opencl_image())));
kernel->setArg(idx++, static_cast<int32_t>(input0->dim(3)));
kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->opencl_image())));
}
const uint32_t gws[3] = {
......@@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel,
for (int i = 0; i < inputs_count; ++i) {
const Tensor *input = input_list[i];
uint32_t idx = 0;
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel->setArg(idx++, *(input->opencl_image()));
kernel->setArg(idx++, static_cast<int32_t>(chan_blk_offset));
kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel->setArg(idx++, *(output->opencl_image()));
index_t input_channel_blk = input->dim(3) / 4;
chan_blk_offset += input_channel_blk;
......
......@@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
uint32_t idx = 0;
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
*(input->opencl_image()));
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer())));
*(filter->opencl_image()));
if (bias != nullptr) {
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer())));
*(bias->opencl_image()));
}
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer())));
*(output->opencl_image()));
// FIXME handle flexable data type: half not supported
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<int>(input_height));
......
......@@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
uint32_t idx = 0;
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
*(input->opencl_image()));
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer())));
*(filter->opencl_image()));
if (bias != nullptr) {
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer())));
*(bias->opencl_image()));
}
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer())));
*(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<int>(input->dim(1)));
kernel->setArg(idx++, static_cast<int>(input->dim(2)));
......
......@@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
uint32_t idx = 0;
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
*(input->opencl_image()));
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer())));
*(filter->opencl_image()));
if (bias != nullptr) {
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer())));
*(bias->opencl_image()));
}
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer())));
*(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1)));
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2)));
......
......@@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel,
*kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options);
uint32_t idx = 0;
kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
kernel->setArg(idx++, *(input->opencl_image()));
kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(filter->buffer())));
idx++, *(filter->opencl_image()));
if (bias != nullptr) {
kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
idx++, *(bias->opencl_image()));
}
kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(output->buffer())));
idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<short>(input_height));
kernel->setArg(idx++, static_cast<short>(input_width));
......
......@@ -39,14 +39,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
uint32_t idx = 0;
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input0->buffer())));
*(input0->opencl_image()));
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input1->buffer())));
*(input1->opencl_image()));
if (!coeff_.empty()) {
kernel_.setArg(idx++, coeff_[0]);
kernel_.setArg(idx++, coeff_[1]);
}
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(output->opencl_image()));
}
const uint32_t gws[2] = {
......
......@@ -61,15 +61,15 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t idx = 0;
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer())));
*(input->opencl_image()));
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(weight->buffer())));
*(weight->opencl_image()));
if (bias != nullptr) {
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer())));
*(bias->opencl_image()));
}
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer())));
*(output->opencl_image()));
kernel_.setArg(idx++, static_cast<int>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int>(input->dim(3)));
......
......@@ -40,11 +40,10 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(A->opencl_image()));
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(A->buffer())));
kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(B->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(C->buffer())));
*(B->opencl_image()));
kernel_.setArg(idx++, *(C->opencl_image()));
kernel_.setArg(idx++, static_cast<int>(height));
kernel_.setArg(idx++, static_cast<int>(width));
kernel_.setArg(idx++, static_cast<int>(A->dim(2)));
......
......@@ -65,7 +65,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(out_height));
......@@ -73,7 +73,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_.setArg(idx++, paddings[1] / 2);
kernel_.setArg(idx++, strides_[0]);
kernel_.setArg(idx++, kernels_[0]);
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(output->opencl_image()));
}
const uint32_t gws[3] = {
......
......@@ -48,8 +48,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(in_height));
......
......@@ -35,10 +35,10 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(logits->buffer())));
kernel_.setArg(idx++, *(logits->opencl_image()));
kernel_.setArg(idx++, static_cast<int>(channels));
kernel_.setArg(idx++, remain_channels);
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
kernel_.setArg(idx++, *(output->opencl_image()));
}
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
static_cast<uint32_t>(width),
......
......@@ -42,11 +42,11 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
uint32_t idx = 0;
if (b2s_) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(batch_tensor->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(space_tensor->buffer())));
kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
kernel_.setArg(idx++, *(space_tensor->opencl_image()));
} else {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(space_tensor->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(batch_tensor->buffer())));
kernel_.setArg(idx++, *(space_tensor->opencl_image()));
kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
}
kernel_.setArg(idx++, block_shape_[0]);
kernel_.setArg(idx++, block_shape_[1]);
......
......@@ -49,8 +49,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
built_options);
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
kernel_.setArg(idx++, *(input_tensor->opencl_image()));
kernel_.setArg(idx++, *(output_tensor->opencl_image()));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(1)));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(2)));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3)));
......@@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
const uint32_t round_h = (height_ + 1) / 2;
const uint32_t round_w = (width_ + 1) / 2;
uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer())));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->opencl_image())));
if (bias != nullptr) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer())));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->opencl_image())));
}
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->opencl_image())));
kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[1]));
kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[2]));
kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册