提交 e2849108 编写于 作者: L Liangliang He

Merge branch 'master' into 'master'

1. Fix ResizeLike.

See merge request !262
...@@ -241,7 +241,9 @@ class Image : public BufferBase { ...@@ -241,7 +241,9 @@ class Image : public BufferBase {
mapped_buf_ = nullptr; mapped_buf_ = nullptr;
}; };
void Resize(index_t size) {} void Resize(index_t size) {
MACE_NOT_IMPLEMENTED;
}
void Copy(void *src, index_t offset, index_t length) { void Copy(void *src, index_t offset, index_t length) {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
...@@ -263,7 +265,11 @@ class Image : public BufferBase { ...@@ -263,7 +265,11 @@ class Image : public BufferBase {
class BufferSlice : public BufferBase { class BufferSlice : public BufferBase {
public: public:
BufferSlice() {} BufferSlice()
: buffer_(nullptr),
mapped_buf_(nullptr),
offset_(0),
length_(0) {}
BufferSlice(BufferBase *buffer, index_t offset, index_t length) BufferSlice(BufferBase *buffer, index_t offset, index_t length)
: BufferBase(buffer->size()), : BufferBase(buffer->size()),
buffer_(buffer), buffer_(buffer),
...@@ -284,12 +290,13 @@ class BufferSlice : public BufferBase { ...@@ -284,12 +290,13 @@ class BufferSlice : public BufferBase {
other.length_) {} other.length_) {}
~BufferSlice() { ~BufferSlice() {
if (mapped_buf_ != nullptr) { if (buffer_ != nullptr && mapped_buf_ != nullptr) {
UnMap(); UnMap();
} }
} }
void *buffer() { void *buffer() {
MACE_CHECK_NOTNULL(buffer_);
return buffer_->buffer(); return buffer_->buffer();
}; };
...@@ -330,6 +337,7 @@ class BufferSlice : public BufferBase { ...@@ -330,6 +337,7 @@ class BufferSlice : public BufferBase {
}; };
void Resize(index_t size) { void Resize(index_t size) {
MACE_NOT_IMPLEMENTED;
} }
void Copy(void *src, index_t offset, index_t length) { void Copy(void *src, index_t offset, index_t length) {
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#ifndef MACE_CORE_TENSOR_H_ #ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/buffer.h" #include "mace/core/buffer.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/core/types.h" #include "mace/core/types.h"
...@@ -112,10 +113,24 @@ class Tensor { ...@@ -112,10 +113,24 @@ class Tensor {
return size() * SizeOfType(); return size() * SizeOfType();
} }
inline void *buffer() const { inline bool has_opencl_image() const {
MACE_CHECK(buffer_ != nullptr && buffer_->buffer() != nullptr, return buffer_ != nullptr && !buffer_->OnHost()
"buffer is null"); && typeid(*buffer_) == typeid(Image);
return buffer_->buffer(); }
inline bool has_opencl_buffer() const {
return buffer_ != nullptr && !buffer_->OnHost()
&& !has_opencl_image();
}
inline cl::Image *opencl_image() const {
MACE_CHECK(has_opencl_image(), "do not have image");
return static_cast<cl::Image*>(buffer_->buffer());
}
inline cl::Buffer *opencl_buffer() const {
MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer");
return static_cast<cl::Buffer*>(buffer_->buffer());
} }
inline index_t buffer_offset() const { inline index_t buffer_offset() const {
...@@ -152,6 +167,7 @@ class Tensor { ...@@ -152,6 +167,7 @@ class Tensor {
inline void Resize(const std::vector<index_t> &shape) { inline void Resize(const std::vector<index_t> &shape) {
shape_ = shape; shape_ = shape;
if (buffer_ != nullptr) { if (buffer_ != nullptr) {
MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
buffer_->Resize(raw_size()); buffer_->Resize(raw_size());
} else { } else {
buffer_ = new Buffer(allocator_, raw_size()); buffer_ = new Buffer(allocator_, raw_size());
...@@ -159,20 +175,38 @@ class Tensor { ...@@ -159,20 +175,38 @@ class Tensor {
} }
} }
inline void ResizeLike(const Tensor &other) {
Resize(other.shape());
}
inline void ResizeLike(const Tensor *other) {
Resize(other->shape());
}
inline void ResizeImage(const std::vector<index_t> &shape, inline void ResizeImage(const std::vector<index_t> &shape,
const std::vector<size_t> &image_shape) { const std::vector<size_t> &image_shape) {
shape_ = shape; shape_ = shape;
if (buffer_ == nullptr) { if (buffer_ == nullptr) {
buffer_ = new Image(image_shape, dtype_); buffer_ = new Image(image_shape, dtype_);
is_buffer_owner_ = true; is_buffer_owner_ = true;
} else {
MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize.");
Image *image = dynamic_cast<Image*>(buffer_);
MACE_CHECK(image_shape[0] <= image->image_shape()[0]
&& image_shape[1] <= image->image_shape()[1]);
}
}
inline void ResizeLike(const Tensor &other) {
ResizeLike(&other);
}
inline void ResizeLike(const Tensor *other) {
if (other->has_opencl_image()) {
if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) {
delete buffer_;
buffer_ = nullptr;
}
ResizeImage(other->shape(),
dynamic_cast<Image *>(other->UnderlyingBuffer())->image_shape());
} else {
if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) {
delete buffer_;
buffer_ = nullptr;
}
Resize(other->shape());
} }
} }
......
...@@ -60,12 +60,12 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -60,12 +60,12 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_ = kernel_ =
runtime->BuildKernel("activation", kernel_name, built_options); runtime->BuildKernel("activation", kernel_name, built_options);
int idx = 0; int idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel_.setArg(idx++, *(input->opencl_image()));
if (activation_ == PRELU) { if (activation_ == PRELU) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(alpha->buffer()))); kernel_.setArg(idx++, *(alpha->opencl_image()));
} }
kernel_.setArg(idx++, static_cast<float>(relux_max_limit_)); kernel_.setArg(idx++, static_cast<float>(relux_max_limit_));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
} }
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks), const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
......
...@@ -58,9 +58,9 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -58,9 +58,9 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t idx = 0; uint32_t idx = 0;
for (auto input : input_tensors) { for (auto input : input_tensors) {
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer()))); *(input->opencl_image()));
} }
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer()))); kernel_.setArg(idx++, *(output_tensor->opencl_image()));
} }
const uint32_t gws[2] = { const uint32_t gws[2] = {
......
...@@ -64,17 +64,17 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -64,17 +64,17 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
runtime->BuildKernel("batch_norm", kernel_name, built_options); runtime->BuildKernel("batch_norm", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(scale->buffer()))); kernel_.setArg(idx++, *(scale->opencl_image()));
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(offset->buffer()))); *(offset->opencl_image()));
if (!folded_constant_) { if (!folded_constant_) {
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(mean->buffer()))); *(mean->opencl_image()));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(var->buffer()))); kernel_.setArg(idx++, *(var->opencl_image()));
kernel_.setArg(idx++, epsilon); kernel_.setArg(idx++, epsilon);
} }
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit_); kernel_.setArg(idx++, relux_max_limit_);
} }
......
...@@ -35,9 +35,9 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -35,9 +35,9 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer()))); kernel_.setArg(idx++, *(bias->opencl_image()));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
} }
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks), const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
......
...@@ -77,7 +77,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer, ...@@ -77,7 +77,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
built_options); built_options);
uint32_t idx = 0; uint32_t idx = 0;
b2f_kernel.setArg(idx++, *(static_cast<const cl::Buffer *>(buffer->buffer()))); b2f_kernel.setArg(idx++, *(buffer->opencl_buffer()));
if (!i2b_) { if (!i2b_) {
MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned"); MACE_CHECK(buffer->buffer_offset() % GetEnumTypeSize(buffer->dtype()) == 0, "buffer offset not aligned");
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype())));
...@@ -93,8 +93,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer, ...@@ -93,8 +93,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(2))); b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(2)));
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(3))); b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(3)));
} }
b2f_kernel.setArg(idx++, *(static_cast<cl::Image2D *>(image->buffer()))); b2f_kernel.setArg(idx++, *(image->opencl_image()));
const std::vector<uint32_t> lws = {16, 64}; const std::vector<uint32_t> lws = {16, 64};
cl::Event event; cl::Event event;
cl_int error = runtime->command_queue().enqueueNDRangeKernel( cl_int error = runtime->command_queue().enqueueNDRangeKernel(
......
...@@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel, ...@@ -42,10 +42,10 @@ static void Concat2(cl::Kernel *kernel,
*kernel = runtime->BuildKernel("concat", kernel_name, built_options); *kernel = runtime->BuildKernel("concat", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->buffer()))); kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input0->opencl_image())));
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->buffer()))); kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input1->opencl_image())));
kernel->setArg(idx++, static_cast<int32_t>(input0->dim(3))); kernel->setArg(idx++, static_cast<int32_t>(input0->dim(3)));
kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->opencl_image())));
} }
const uint32_t gws[3] = { const uint32_t gws[3] = {
...@@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel, ...@@ -90,9 +90,9 @@ static void ConcatN(cl::Kernel *kernel,
for (int i = 0; i < inputs_count; ++i) { for (int i = 0; i < inputs_count; ++i) {
const Tensor *input = input_list[i]; const Tensor *input = input_list[i];
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel->setArg(idx++, *(input->opencl_image()));
kernel->setArg(idx++, static_cast<int32_t>(chan_blk_offset)); kernel->setArg(idx++, static_cast<int32_t>(chan_blk_offset));
kernel->setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel->setArg(idx++, *(output->opencl_image()));
index_t input_channel_blk = input->dim(3) / 4; index_t input_channel_blk = input->dim(3) / 4;
chan_blk_offset += input_channel_blk; chan_blk_offset += input_channel_blk;
......
...@@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, ...@@ -71,15 +71,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer()))); *(input->opencl_image()));
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer()))); *(filter->opencl_image()));
if (bias != nullptr) { if (bias != nullptr) {
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer()))); *(bias->opencl_image()));
} }
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer()))); *(output->opencl_image()));
// FIXME handle flexable data type: half not supported // FIXME handle flexable data type: half not supported
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<int>(input_height)); kernel->setArg(idx++, static_cast<int>(input_height));
......
...@@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, ...@@ -66,15 +66,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer()))); *(input->opencl_image()));
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer()))); *(filter->opencl_image()));
if (bias != nullptr) { if (bias != nullptr) {
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer()))); *(bias->opencl_image()));
} }
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer()))); *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<int>(input->dim(1))); kernel->setArg(idx++, static_cast<int>(input->dim(1)));
kernel->setArg(idx++, static_cast<int>(input->dim(2))); kernel->setArg(idx++, static_cast<int>(input->dim(2)));
......
...@@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel, ...@@ -66,15 +66,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer()))); *(input->opencl_image()));
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(filter->buffer()))); *(filter->opencl_image()));
if (bias != nullptr) { if (bias != nullptr) {
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer()))); *(bias->opencl_image()));
} }
kernel->setArg(idx++, kernel->setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer()))); *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1))); kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1)));
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2))); kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2)));
......
...@@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel, ...@@ -81,16 +81,15 @@ void DepthwiseConv2d(cl::Kernel *kernel,
*kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel->setArg(idx++, kernel->setArg(idx++, *(input->opencl_image()));
*(static_cast<const cl::Image2D *>(input->buffer())));
kernel->setArg( kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(filter->buffer()))); idx++, *(filter->opencl_image()));
if (bias != nullptr) { if (bias != nullptr) {
kernel->setArg( kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(bias->buffer()))); idx++, *(bias->opencl_image()));
} }
kernel->setArg( kernel->setArg(
idx++, *(static_cast<const cl::Image2D *>(output->buffer()))); idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, static_cast<short>(input_height)); kernel->setArg(idx++, static_cast<short>(input_height));
kernel->setArg(idx++, static_cast<short>(input_width)); kernel->setArg(idx++, static_cast<short>(input_width));
......
...@@ -39,14 +39,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0, ...@@ -39,14 +39,14 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input0->buffer()))); *(input0->opencl_image()));
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input1->buffer()))); *(input1->opencl_image()));
if (!coeff_.empty()) { if (!coeff_.empty()) {
kernel_.setArg(idx++, coeff_[0]); kernel_.setArg(idx++, coeff_[0]);
kernel_.setArg(idx++, coeff_[1]); kernel_.setArg(idx++, coeff_[1]);
} }
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
} }
const uint32_t gws[2] = { const uint32_t gws[2] = {
......
...@@ -61,15 +61,15 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -61,15 +61,15 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(input->buffer()))); *(input->opencl_image()));
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(weight->buffer()))); *(weight->opencl_image()));
if (bias != nullptr) { if (bias != nullptr) {
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(bias->buffer()))); *(bias->opencl_image()));
} }
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(output->buffer()))); *(output->opencl_image()));
kernel_.setArg(idx++, static_cast<int>(input->dim(1))); kernel_.setArg(idx++, static_cast<int>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int>(input->dim(2))); kernel_.setArg(idx++, static_cast<int>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int>(input->dim(3))); kernel_.setArg(idx++, static_cast<int>(input->dim(3)));
......
...@@ -40,11 +40,10 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -40,11 +40,10 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options); kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(A->opencl_image()));
kernel_.setArg(idx++, kernel_.setArg(idx++,
*(static_cast<const cl::Image2D *>(A->buffer()))); *(B->opencl_image()));
kernel_.setArg(idx++, kernel_.setArg(idx++, *(C->opencl_image()));
*(static_cast<const cl::Image2D *>(B->buffer())));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(C->buffer())));
kernel_.setArg(idx++, static_cast<int>(height)); kernel_.setArg(idx++, static_cast<int>(height));
kernel_.setArg(idx++, static_cast<int>(width)); kernel_.setArg(idx++, static_cast<int>(width));
kernel_.setArg(idx++, static_cast<int>(A->dim(2))); kernel_.setArg(idx++, static_cast<int>(A->dim(2)));
......
...@@ -65,7 +65,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -65,7 +65,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(out_height)); kernel_.setArg(idx++, static_cast<int32_t>(out_height));
...@@ -73,7 +73,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -73,7 +73,7 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, paddings[1] / 2);
kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, strides_[0]);
kernel_.setArg(idx++, kernels_[0]); kernel_.setArg(idx++, kernels_[0]);
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
} }
const uint32_t gws[3] = { const uint32_t gws[3] = {
......
...@@ -48,8 +48,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -48,8 +48,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer()))); kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(in_height)); kernel_.setArg(idx++, static_cast<int32_t>(in_height));
......
...@@ -35,10 +35,10 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits, ...@@ -35,10 +35,10 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(logits->buffer()))); kernel_.setArg(idx++, *(logits->opencl_image()));
kernel_.setArg(idx++, static_cast<int>(channels)); kernel_.setArg(idx++, static_cast<int>(channels));
kernel_.setArg(idx++, remain_channels); kernel_.setArg(idx++, remain_channels);
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer()))); kernel_.setArg(idx++, *(output->opencl_image()));
} }
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks), const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
static_cast<uint32_t>(width), static_cast<uint32_t>(width),
......
...@@ -42,11 +42,11 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor ...@@ -42,11 +42,11 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
uint32_t idx = 0; uint32_t idx = 0;
if (b2s_) { if (b2s_) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(batch_tensor->buffer()))); kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(space_tensor->buffer()))); kernel_.setArg(idx++, *(space_tensor->opencl_image()));
} else { } else {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(space_tensor->buffer()))); kernel_.setArg(idx++, *(space_tensor->opencl_image()));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(batch_tensor->buffer()))); kernel_.setArg(idx++, *(batch_tensor->opencl_image()));
} }
kernel_.setArg(idx++, block_shape_[0]); kernel_.setArg(idx++, block_shape_[0]);
kernel_.setArg(idx++, block_shape_[1]); kernel_.setArg(idx++, block_shape_[1]);
......
...@@ -49,8 +49,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i ...@@ -49,8 +49,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
built_options); built_options);
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer()))); kernel_.setArg(idx++, *(input_tensor->opencl_image()));
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer()))); kernel_.setArg(idx++, *(output_tensor->opencl_image()));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(1))); kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(1)));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(2))); kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(2)));
kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3))); kernel_.setArg(idx++, static_cast<uint32_t>(input_tensor->dim(3)));
...@@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te ...@@ -119,11 +119,11 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_h = (height_ + 1) / 2;
const uint32_t round_w = (width_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2;
uint32_t idx = 0; uint32_t idx = 0;
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->buffer()))); kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input_tensor->opencl_image())));
if (bias != nullptr) { if (bias != nullptr) {
kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->buffer()))); kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(bias->opencl_image())));
} }
kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->buffer()))); kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_tensor->opencl_image())));
kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[1])); kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[1]));
kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[2])); kernel_.setArg(idx++, static_cast<uint32_t>(output_shape[2]));
kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w)); kernel_.setArg(idx++, static_cast<uint32_t>(round_h * round_w));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册