未验证 提交 f8fb5052 编写于 作者: X xiebaiyuan 提交者: GitHub

fix useless init empty image ,test=develop (#2354)

* add mem opt option for high api ,test = develop

* fix is_lod params && test-performance ,test=develop

* fix useless init emptyimage , test = develop
上级 f1d0a8ae
......@@ -135,7 +135,7 @@ class CLImage {
// CLImageConverterFolder *folder_converter = new
// CLImageConverterFolder();
CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
DLOG << " to get image dims ";
image_dims_ = normal_converter->InitImageDimInfoWith(dim);
DLOG << " end get image dims " << image_dims_;
......@@ -176,7 +176,9 @@ class CLImage {
image_converter_ = normal_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true;
DLOG << " end init cl image";
shared_mem_ = true;
DLOG << " end init FakeSizeImage";
}
/**
* init cl mem with a exist cl mem
......@@ -194,15 +196,16 @@ class CLImage {
DLOG << "InitWithExistMem ... ";
DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_;
// PADDLE_MOBILE_ENFORCE(real_image_dims[0] >= image_dims_[0] &&
// real_image_dims[1] >= image_dims_[1],
// "real image is not enough!");
if (real_image_dims_[0] < image_dims_[0] ||
real_image_dims_[1] < image_dims_[1]) {
DLOG << "real image is not enough!";
DLOG << "real_image_dims: " << real_image_dims_;
DLOG << "image_dims_: " << image_dims_;
}
PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
real_image_dims_[1] >= image_dims_[1],
"real image is not enough!");
if (cl_image_ != src.cl_image_) {
cl_image_.reset(src.cl_image_.get());
}
......@@ -212,7 +215,9 @@ class CLImage {
image_converter_ = normal_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true;
DLOG << " end init cl image";
shared_mem_ = true;
DLOG << " end init WithExistMem";
}
void InitConv2dTransposeFilterCLImage(cl_context context,
......@@ -281,6 +286,8 @@ class CLImage {
private:
void InitCLImage(cl_context context, size_t width, size_t height,
void *data) {
PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT};
cl_image_desc cid = {
......@@ -321,6 +328,7 @@ class CLImage {
cl_context context_;
cl_command_queue command_queue_;
CLImageConverterBase *image_converter_ = nullptr;
bool shared_mem_ = false;
};
void TensorToCLImage(Tensor *tensor, CLImage *image, cl_context context,
......
......@@ -98,7 +98,7 @@ void FusionFcCompute(const FusionFcParam<GPU_CL> &param, cl_context context,
static_cast<float>(1), out, static_cast<float>(1),
false);
out_image->InitEmptyImage(context, commandQueue, out->dims());
// out_image->InitEmptyImage(context, commandQueue, out->dims());
framework::TensorToCLImage(out, out_image, context, commandQueue, kernel1);
delete (input_x);
......
......@@ -63,7 +63,7 @@ void MulCompute(const MulParam<GPU_CL> &param, cl_context context,
static_cast<float>(1), output_tensor,
static_cast<float>(0));
output->InitEmptyImage(context, commandQueue, output_tensor->dims());
// output->InitEmptyImage(context, commandQueue, output_tensor->dims());
framework::TensorToCLImage(output_tensor, output, context, commandQueue,
kernel1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册