未验证 提交 f8fb5052 编写于 作者: X xiebaiyuan 提交者: GitHub

fix useless init empty image ,test=develop (#2354)

* add mem opt option for high api ,test = develop

* fix is_lod params && test-performance ,test=develop

* fix useless init emptyimage , test = develop
上级 f1d0a8ae
...@@ -135,7 +135,7 @@ class CLImage { ...@@ -135,7 +135,7 @@ class CLImage {
// CLImageConverterFolder *folder_converter = new // CLImageConverterFolder *folder_converter = new
// CLImageConverterFolder(); // CLImageConverterFolder();
CLImageConverterNormal *normal_converter = new CLImageConverterNormal(); CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
DLOG << " to get image dims "; DLOG << " to get image dims ";
image_dims_ = normal_converter->InitImageDimInfoWith(dim); image_dims_ = normal_converter->InitImageDimInfoWith(dim);
DLOG << " end get image dims " << image_dims_; DLOG << " end get image dims " << image_dims_;
...@@ -176,7 +176,9 @@ class CLImage { ...@@ -176,7 +176,9 @@ class CLImage {
image_converter_ = normal_converter; image_converter_ = normal_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context); cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true; initialized_ = true;
DLOG << " end init cl image"; shared_mem_ = true;
DLOG << " end init FakeSizeImage";
} }
/** /**
* init cl mem with a exist cl mem * init cl mem with a exist cl mem
...@@ -194,15 +196,16 @@ class CLImage { ...@@ -194,15 +196,16 @@ class CLImage {
DLOG << "InitWithExistMem ... "; DLOG << "InitWithExistMem ... ";
DLOG << "real_image_dims: " << real_image_dims_; DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_; DLOG << "image_dims_: " << image_dims_;
// PADDLE_MOBILE_ENFORCE(real_image_dims[0] >= image_dims_[0] &&
// real_image_dims[1] >= image_dims_[1],
// "real image is not enough!");
if (real_image_dims_[0] < image_dims_[0] || if (real_image_dims_[0] < image_dims_[0] ||
real_image_dims_[1] < image_dims_[1]) { real_image_dims_[1] < image_dims_[1]) {
DLOG << "real image is not enough!"; DLOG << "real image is not enough!";
DLOG << "real_image_dims: " << real_image_dims_; DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_; DLOG << "image_dims_: " << image_dims_;
} }
PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
real_image_dims_[1] >= image_dims_[1],
"real image is not enough!");
if (cl_image_ != src.cl_image_) { if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get()); cl_image_.reset(src.cl_image_.get());
} }
...@@ -212,7 +215,9 @@ class CLImage { ...@@ -212,7 +215,9 @@ class CLImage {
image_converter_ = normal_converter; image_converter_ = normal_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context); cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true; initialized_ = true;
DLOG << " end init cl image"; shared_mem_ = true;
DLOG << " end init WithExistMem";
} }
void InitConv2dTransposeFilterCLImage(cl_context context, void InitConv2dTransposeFilterCLImage(cl_context context,
...@@ -281,6 +286,8 @@ class CLImage { ...@@ -281,6 +286,8 @@ class CLImage {
private: private:
void InitCLImage(cl_context context, size_t width, size_t height, void InitCLImage(cl_context context, size_t width, size_t height,
void *data) { void *data) {
PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
cl_image_format cf = {.image_channel_order = CL_RGBA, cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT}; .image_channel_data_type = CL_HALF_FLOAT};
cl_image_desc cid = { cl_image_desc cid = {
...@@ -321,6 +328,7 @@ class CLImage { ...@@ -321,6 +328,7 @@ class CLImage {
cl_context context_; cl_context context_;
cl_command_queue command_queue_; cl_command_queue command_queue_;
CLImageConverterBase *image_converter_ = nullptr; CLImageConverterBase *image_converter_ = nullptr;
bool shared_mem_ = false;
}; };
void TensorToCLImage(Tensor *tensor, CLImage *image, cl_context context, void TensorToCLImage(Tensor *tensor, CLImage *image, cl_context context,
......
...@@ -98,7 +98,7 @@ void FusionFcCompute(const FusionFcParam<GPU_CL> &param, cl_context context, ...@@ -98,7 +98,7 @@ void FusionFcCompute(const FusionFcParam<GPU_CL> &param, cl_context context,
static_cast<float>(1), out, static_cast<float>(1), static_cast<float>(1), out, static_cast<float>(1),
false); false);
out_image->InitEmptyImage(context, commandQueue, out->dims()); // out_image->InitEmptyImage(context, commandQueue, out->dims());
framework::TensorToCLImage(out, out_image, context, commandQueue, kernel1); framework::TensorToCLImage(out, out_image, context, commandQueue, kernel1);
delete (input_x); delete (input_x);
......
...@@ -63,7 +63,7 @@ void MulCompute(const MulParam<GPU_CL> &param, cl_context context, ...@@ -63,7 +63,7 @@ void MulCompute(const MulParam<GPU_CL> &param, cl_context context,
static_cast<float>(1), output_tensor, static_cast<float>(1), output_tensor,
static_cast<float>(0)); static_cast<float>(0));
output->InitEmptyImage(context, commandQueue, output_tensor->dims()); // output->InitEmptyImage(context, commandQueue, output_tensor->dims());
framework::TensorToCLImage(output_tensor, output, context, commandQueue, framework::TensorToCLImage(output_tensor, output, context, commandQueue,
kernel1); kernel1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册