diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp index 8b0316af4f90803871f09aa3bda737c466390bf9..a999971192ceb01299b3b03846a95ec257de61d3 100644 --- a/src/framework/cl/cl_image.cpp +++ b/src/framework/cl/cl_image.cpp @@ -168,9 +168,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { i0 += width * H; } - if (err != CL_SUCCESS) { - CL_CHECK_ERRORS(err); - } + CL_CHECK_ERRORS(err); + for (int i = 0; i < cl_image.numel(); i += stride) { printer << data[i] << " "; } diff --git a/src/framework/cl/cl_tensor.h b/src/framework/cl/cl_tensor.h index c38091dd39c776254035f9b13c8505d64686915a..1d6829fe4b77639f34df0be37d7a539b91ff4bcc 100644 --- a/src/framework/cl/cl_tensor.h +++ b/src/framework/cl/cl_tensor.h @@ -28,7 +28,19 @@ namespace framework { class CLTensor : TensorBase { public: - explicit CLTensor(cl_context context) : context_(context) {} + CLTensor(cl_context context, cl_command_queue command_queue) + : context_(context), command_queue_(command_queue) {} + + CLTensor() = default; + + /* + * if init method haven't set context and command_queue, need set + * */ + void SetContextAndCommandQueue(cl_context context, + cl_command_queue command_queue) { + context_ = context; + command_queue_ = command_queue; + } /*! Resize the dimensions of the memory block. */ inline CLTensor &Resize(const DDim &dims) { @@ -39,7 +51,8 @@ class CLTensor : TensorBase { template inline T mutable_with_data(void *data) { int64_t size = numel() * sizeof(float); - holder_.reset(new PlaceholderImpl(size, data, typeid(T), context_)); + holder_.reset( + new PlaceholderImpl(size, data, typeid(T), context_, command_queue_)); return reinterpret_cast( reinterpret_cast(reinterpret_cast(holder_->ptr()))); } @@ -51,7 +64,7 @@ class CLTensor : TensorBase { PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.") int64_t size = numel() * SizeOfType(type); if (holder_ == nullptr || holder_->size() < size + offset_) { - holder_.reset(new PlaceholderImpl(size, type, context_)); + holder_.reset(new PlaceholderImpl(size, type, context_, command_queue_)); offset_ = 0; } return reinterpret_cast( @@ -85,6 +98,7 @@ class CLTensor : TensorBase { private: cl_context context_; + cl_command_queue command_queue_; /* * virtual ~Placeholder() = default; @@ -99,20 +113,31 @@ class CLTensor : TensorBase { * */ struct PlaceholderImpl : public Placeholder { PlaceholderImpl(size_t size, void *input, std::type_index type, - cl_context context) + cl_context context, cl_command_queue command_queue) : ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, reinterpret_cast(input), NULL)), size_(size), - type_(type) {} + type_(type), + command_queue_(command_queue) {} - PlaceholderImpl(size_t size, std::type_index type, cl_context context) + PlaceholderImpl(size_t size, std::type_index type, cl_context context, + cl_command_queue command_queue) : ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)), size_(size), - type_(type) {} + type_(type), + command_queue_(command_queue) {} virtual size_t size() const { return size_; } - virtual void *ptr() const { return static_cast(ptr_.get()); } + virtual void *ptr() const { + if (host_ptr_) { + delete (host_ptr_); + } + char *host_ptr = new char[size_]; + clEnqueueReadBuffer(command_queue_, ptr_.get(), CL_TRUE, 0, size_, + host_ptr, 0, NULL, NULL); + return static_cast(host_ptr); + } virtual std::type_index type() const { return type_; } @@ -124,6 +149,17 @@ class CLTensor : TensorBase { /* the current type of memory */ std::type_index type_; + + cl_command_queue command_queue_; + + ~PlaceholderImpl() { + if (host_ptr_) { + delete (host_ptr_); + } + } + + private: + void *host_ptr_; }; }; diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 7980a2d1f6e3f46060dd25e5a6bede7c50cf7c8d..80589706f94eb0c2331d5af0049c6d53df8ca876 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -37,7 +37,7 @@ limitations under the License. */ #include "framework/cl/cl_image.h" #endif -int debug_to = 3; +int debug_to = 115; namespace paddle_mobile { namespace framework { @@ -87,7 +87,7 @@ Executor::Executor(const framework::Program p, int batch_size, for (int i = 0; i < blocks.size(); ++i) { std::shared_ptr block_desc = blocks[i]; std::vector> ops = block_desc->Ops(); - for (int j = 0; j < debug_to; ++j) { + for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op = ops[j]; DLOG << "create op: " << j << " " << op->Type(); auto op_base = framework::OpRegistry::CreateOp( @@ -416,7 +416,7 @@ std::shared_ptr Executor::Predict( } } #else - for (int i = 0; i < debug_to; i++) { + for (int i = 0; i < ops.size(); i++) { #ifdef PADDLE_MOBILE_PROFILE struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index 7ce60e6d1e9a687a3f6623ff8dd8e07576c02daf..272e130817eda62f71a67e179a57ce63f024bc4d 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -40,6 +40,11 @@ bool ConvAddBNReluKernel::Init( const framework::CLImage *scale = param->InputScale(); const framework::CLImage *bias = param->InputBias(); const float epsilon = param->Epsilon(); + // + // DLOG << " climage mean: " << *mean; + // DLOG << " climage variance: " << *variance; + // DLOG << " climage scale: " << *scale; + // DLOG << " climage bias: " << *bias; auto mean_ptr = mean->data(); auto variance_ptr = variance->data(); @@ -67,12 +72,20 @@ bool ConvAddBNReluKernel::Init( new_scale->InitCLImage(this->cl_helper_.CLContext(), cl_helper_.CLCommandQueue()); + DLOG << " climage - y bias: " << *(param->Bias()); + + DLOG << " climage - new scale: " << *new_scale; + framework::CLImage *new_bias = new framework::CLImage(); new_bias->SetTensorData(new_bias_ptr, variance->dims()); new_bias->InitCLImage(this->cl_helper_.CLContext(), cl_helper_.CLCommandQueue()); + DLOG << " climage - new bias: " << *new_bias; + + DLOG << " climage - filter: " << *(param->Filter()); + param->SetNewScale(new_scale); param->SetNewBias(new_bias); diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index f0587d69dfddc31f5fe0c5c215aea53bf75c42ed..0db2b7cc4665ff74d06ca62ba9e77d427d883233 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -36,7 +36,8 @@ void FeedKernel::Compute(const FeedParam ¶m) { cl_mem cl_image = output->GetCLImage(); int height = output->dims()[2]; int width = output->dims()[3]; - CLTensor input_cl_tensor(this->cl_helper_.CLContext()); + CLTensor input_cl_tensor(this->cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); input_cl_tensor.Resize(input->dims()); cl_mem inputBuffer = input_cl_tensor.mutable_with_data((void *)input_data);