提交 e309c772 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1134 from codeWorm2015/opencl

 fix image memory copy bug
...@@ -126,7 +126,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { ...@@ -126,7 +126,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
DDim ddim = cl_image.dims(); DDim ddim = cl_image.dims();
size_t N, C, H, W, width, height; size_t N, C, H, W, width, height;
if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 || cl_image.dims().size() == 4) { if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 ||
cl_image.dims().size() == 4) {
if (ddim.size() == 4) { if (ddim.size() == 4) {
N = ddim[0]; N = ddim[0];
if (N < 0) { if (N < 0) {
...@@ -185,8 +186,6 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { ...@@ -185,8 +186,6 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
delete (imageData); delete (imageData);
CL_CHECK_ERRORS(err); CL_CHECK_ERRORS(err);
} else { } else {
if (ddim.size() == 2) { if (ddim.size() == 2) {
width = (ddim[1] + 3) / 4; width = (ddim[1] + 3) / 4;
...@@ -220,7 +219,7 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { ...@@ -220,7 +219,7 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
for (int i = 0; i < cl_image.numel(); i += stride) { for (int i = 0; i < cl_image.numel(); i += stride) {
printer << data[i] << " "; printer << data[i] << " ";
} }
delete(data); delete (data);
return printer; return printer;
} }
#endif #endif
......
...@@ -26,11 +26,7 @@ limitations under the License. */ ...@@ -26,11 +26,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
enum ImageType { enum ImageType { Invalid = -1, Normal = 0, Folder = 1 };
Invalid = -1,
Normal = 0,
Folder = 1
};
class CLImage { class CLImage {
public: public:
...@@ -43,9 +39,10 @@ class CLImage { ...@@ -43,9 +39,10 @@ class CLImage {
int numel = product(dim); int numel = product(dim);
if (tensor_data_ != nullptr) { if (tensor_data_ != nullptr) {
delete[](tensor_data_); delete[](tensor_data_);
tensor_data_ = nullptr;
} }
tensor_data_ = new float[numel]; tensor_data_ = new float[numel];
memcpy(tensor_data_, tensorData, numel); memcpy(tensor_data_, tensorData, numel * sizeof(float));
tensor_dims_ = dim; tensor_dims_ = dim;
} }
......
...@@ -40,6 +40,25 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -40,6 +40,25 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
const framework::CLImage *scale = param->InputScale(); const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias(); const framework::CLImage *bias = param->InputBias();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
const int C = mean->numel();
for (int j = 0; j < C; ++j) {
DLOG << " mean - " << j << mean->data<float>()[j];
}
for (int j = 0; j < C; ++j) {
DLOG << " variance - " << j << variance->data<float>()[j];
}
for (int j = 0; j < C; ++j) {
DLOG << " scale - " << j << scale->data<float>()[j];
}
for (int j = 0; j < C; ++j) {
DLOG << " bias - " << j << bias->data<float>()[j];
}
// //
// DLOG << " climage mean: " << *mean; // DLOG << " climage mean: " << *mean;
// DLOG << " climage variance: " << *variance; // DLOG << " climage variance: " << *variance;
...@@ -51,8 +70,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -51,8 +70,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
auto scale_ptr = scale->data<float>(); auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
const int C = mean->numel();
float inv_std_ptr[C]; float inv_std_ptr[C];
for (int i = 0; i < C; i++) { for (int i = 0; i < C; i++) {
inv_std_ptr[i] = inv_std_ptr[i] =
...@@ -68,6 +85,14 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -68,6 +85,14 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
framework::CLImage *new_scale = new framework::CLImage(); framework::CLImage *new_scale = new framework::CLImage();
for (int j = 0; j < C; ++j) {
DLOG << " new scale - " << j << new_scale_ptr[j];
}
for (int j = 0; j < C; ++j) {
DLOG << " new bias - " << j << new_bias_ptr[j];
}
new_scale->SetTensorData(new_scale_ptr, variance->dims()); new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext(), new_scale->InitCLImage(this->cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
......
...@@ -23,8 +23,10 @@ bool ReluKernel<GPU_CL, float>::Init(ReluParam<GPU_CL>* param) { ...@@ -23,8 +23,10 @@ bool ReluKernel<GPU_CL, float>::Init(ReluParam<GPU_CL>* param) {
this->cl_helper_.AddKernel("relu", "relu.cl"); this->cl_helper_.AddKernel("relu", "relu.cl");
this->cl_helper_.AddKernel("relu_p0", "relu.cl"); this->cl_helper_.AddKernel("relu_p0", "relu.cl");
this->cl_helper_.AddKernel("relu_p1", "relu.cl"); this->cl_helper_.AddKernel("relu_p1", "relu.cl");
const auto dim = const_cast<framework::CLImage*>(param->InputX())->ImageDims(); const auto dim =
param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(), this->cl_helper_.CLCommandQueue(), dim); const_cast<framework::CLImage*>(param->InputX())->ImageDims();
param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue(), dim);
return true; return true;
} }
...@@ -37,17 +39,20 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) { ...@@ -37,17 +39,20 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
auto inputImage = input->GetCLImage(); auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage(); auto outputImage = output->GetCLImage();
auto tImage = const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage(); auto tImage =
const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage); clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage); clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage);
clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage); clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage);
clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage); clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3, NULL, // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3,
// work_size, NULL, 0, NULL, NULL); // NULL,
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, NULL, // work_size, NULL, 0, NULL, NULL);
// work_size, NULL, 0, NULL, NULL); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL,
// work_size, NULL, 0, NULL, NULL);
} }
template class ReluKernel<GPU_CL, float>; template class ReluKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册