提交 4af8f0f4 编写于 作者: L liuruilong

fix image memory copy bug

上级 4259eafd
......@@ -126,7 +126,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
DDim ddim = cl_image.dims();
size_t N, C, H, W, width, height;
if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 || cl_image.dims().size() == 4) {
if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 ||
cl_image.dims().size() == 4) {
if (ddim.size() == 4) {
N = ddim[0];
if (N < 0) {
......@@ -185,8 +186,6 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
delete (imageData);
} else {
if (ddim.size() == 2) {
width = (ddim[1] + 3) / 4;
......@@ -220,7 +219,7 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
for (int i = 0; i < cl_image.numel(); i += stride) {
printer << data[i] << " ";
delete (data);
return printer;
......@@ -26,11 +26,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
enum ImageType {
Invalid = -1,
Normal = 0,
Folder = 1
enum ImageType { Invalid = -1, Normal = 0, Folder = 1 };
class CLImage {
......@@ -43,9 +39,10 @@ class CLImage {
int numel = product(dim);
if (tensor_data_ != nullptr) {
tensor_data_ = nullptr;
tensor_data_ = new float[numel];
memcpy(tensor_data_, tensorData, numel);
memcpy(tensor_data_, tensorData, numel * sizeof(float));
tensor_dims_ = dim;
......@@ -40,6 +40,25 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias();
const float epsilon = param->Epsilon();
const int C = mean->numel();
for (int j = 0; j < C; ++j) {
DLOG << " mean - " << j << mean->data<float>()[j];
for (int j = 0; j < C; ++j) {
DLOG << " variance - " << j << variance->data<float>()[j];
for (int j = 0; j < C; ++j) {
DLOG << " scale - " << j << scale->data<float>()[j];
for (int j = 0; j < C; ++j) {
DLOG << " bias - " << j << bias->data<float>()[j];
// DLOG << " climage mean: " << *mean;
// DLOG << " climage variance: " << *variance;
......@@ -51,8 +70,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
const int C = mean->numel();
float inv_std_ptr[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
......@@ -68,6 +85,14 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
framework::CLImage *new_scale = new framework::CLImage();
for (int j = 0; j < C; ++j) {
DLOG << " new scale - " << j << new_scale_ptr[j];
for (int j = 0; j < C; ++j) {
DLOG << " new bias - " << j << new_bias_ptr[j];
new_scale->SetTensorData(new_scale_ptr, variance->dims());
......@@ -23,8 +23,10 @@ bool ReluKernel<GPU_CL, float>::Init(ReluParam<GPU_CL>* param) {
this->cl_helper_.AddKernel("relu", "relu.cl");
this->cl_helper_.AddKernel("relu_p0", "relu.cl");
this->cl_helper_.AddKernel("relu_p1", "relu.cl");
const auto dim = const_cast<framework::CLImage*>(param->InputX())->ImageDims();
param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(), this->cl_helper_.CLCommandQueue(), dim);
const auto dim =
this->cl_helper_.CLCommandQueue(), dim);
return true;
......@@ -37,17 +39,20 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
auto tImage = const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
auto tImage =
clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage);
clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage);
clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3, NULL,
// work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, NULL,
// work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3,
// NULL,
// work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL,
// work_size, NULL, 0, NULL, NULL);
template class ReluKernel<GPU_CL, float>;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册