diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp index 50ad9a3b6753d7cb6fb5baf8f8f203d0f3045ab7..b104b8c048af03b0245f7edd64bc140e6fd88549 100644 --- a/src/framework/cl/cl_image.cpp +++ b/src/framework/cl/cl_image.cpp @@ -126,7 +126,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { DDim ddim = cl_image.dims(); size_t N, C, H, W, width, height; - if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 || cl_image.dims().size() == 4) { + if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 || + cl_image.dims().size() == 4) { if (ddim.size() == 4) { N = ddim[0]; if (N < 0) { @@ -185,8 +186,6 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { delete (imageData); CL_CHECK_ERRORS(err); - - } else { if (ddim.size() == 2) { width = (ddim[1] + 3) / 4; @@ -220,7 +219,7 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { for (int i = 0; i < cl_image.numel(); i += stride) { printer << data[i] << " "; } - delete(data); + delete (data); return printer; } #endif diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index 6ea549427847210cffdb500e581aa72e27391c7e..b7625ca65dec440aa18ad5867626cd0897098980 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -26,11 +26,7 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { -enum ImageType { - Invalid = -1, - Normal = 0, - Folder = 1 -}; +enum ImageType { Invalid = -1, Normal = 0, Folder = 1 }; class CLImage { public: @@ -43,9 +39,10 @@ class CLImage { int numel = product(dim); if (tensor_data_ != nullptr) { delete[](tensor_data_); + tensor_data_ = nullptr; } tensor_data_ = new float[numel]; - memcpy(tensor_data_, tensorData, numel); + memcpy(tensor_data_, tensorData, numel * sizeof(float)); tensor_dims_ = dim; } diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index 272e130817eda62f71a67e179a57ce63f024bc4d..1c75b8be9fe05eefac0930a6fe2b79c42e952148 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -40,6 +40,25 @@ bool ConvAddBNReluKernel::Init( const framework::CLImage *scale = param->InputScale(); const framework::CLImage *bias = param->InputBias(); const float epsilon = param->Epsilon(); + + const int C = mean->numel(); + + for (int j = 0; j < C; ++j) { + DLOG << " mean - " << j << mean->data()[j]; + } + + for (int j = 0; j < C; ++j) { + DLOG << " variance - " << j << variance->data()[j]; + } + + for (int j = 0; j < C; ++j) { + DLOG << " scale - " << j << scale->data()[j]; + } + + for (int j = 0; j < C; ++j) { + DLOG << " bias - " << j << bias->data()[j]; + } + // // DLOG << " climage mean: " << *mean; // DLOG << " climage variance: " << *variance; @@ -51,8 +70,6 @@ bool ConvAddBNReluKernel::Init( auto scale_ptr = scale->data(); auto bias_ptr = bias->data(); - const int C = mean->numel(); - float inv_std_ptr[C]; for (int i = 0; i < C; i++) { inv_std_ptr[i] = @@ -68,6 +85,14 @@ bool ConvAddBNReluKernel::Init( framework::CLImage *new_scale = new framework::CLImage(); + for (int j = 0; j < C; ++j) { + DLOG << " new scale - " << j << new_scale_ptr[j]; + } + + for (int j = 0; j < C; ++j) { + DLOG << " new bias - " << j << new_bias_ptr[j]; + } + new_scale->SetTensorData(new_scale_ptr, variance->dims()); new_scale->InitCLImage(this->cl_helper_.CLContext(), cl_helper_.CLCommandQueue()); diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index b451afcae716ff17c2d689f8bb7c198a199e93dd..a50079db6072ef5aa556167aa2376a895dad2ae1 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -23,8 +23,10 @@ bool ReluKernel::Init(ReluParam* param) { this->cl_helper_.AddKernel("relu", "relu.cl"); this->cl_helper_.AddKernel("relu_p0", "relu.cl"); this->cl_helper_.AddKernel("relu_p1", "relu.cl"); - const auto dim = const_cast(param->InputX())->ImageDims(); - param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(), this->cl_helper_.CLCommandQueue(), dim); + const auto dim = + const_cast(param->InputX())->ImageDims(); + param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue(), dim); return true; } @@ -37,17 +39,20 @@ void ReluKernel::Compute(const ReluParam& param) { auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto inputImage = input->GetCLImage(); auto outputImage = output->GetCLImage(); - auto tImage = const_cast&>(param).getMidImage().GetCLImage(); + auto tImage = + const_cast&>(param).getMidImage().GetCLImage(); clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage); clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage); clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage); clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage); const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; -// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3, NULL, -// work_size, NULL, 0, NULL, NULL); -// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, NULL, -// work_size, NULL, 0, NULL, NULL); + // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3, + // NULL, + // work_size, NULL, 0, NULL, NULL); + // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, + // NULL, + // work_size, NULL, 0, NULL, NULL); } template class ReluKernel;