提交 408407f4 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1090 from codeWorm2015/opencl

update batch norm
......@@ -37,7 +37,7 @@ limitations under the License. */
#include "framework/cl/cl_image.h"
#endif
int debug_to = 2;
int debug_to = 4;
namespace paddle_mobile {
namespace framework {
......
......@@ -47,15 +47,20 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) {
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
delete[](new_scale_ptr);
delete[](new_bias_ptr);
framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext());
framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext());
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
return true;
}
......
......@@ -3,8 +3,8 @@
__kernel void batchnorm(__private const int out_height,
__private const int out_width,
__read_only image2d_t input,
__read_only image2d_t new_scale,
__read_only image2d_t new_bias,
__read_only image2d_t new_scale_image,
__read_only image2d_t new_bias_image,
__write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
......@@ -13,12 +13,12 @@ __kernel void batchnorm(__private const int out_height,
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
half4 new_scale = read_imageh(bn_scale, sampler, (int2)(out_c, 0));
half4 new_bias = read_imageh(bn_bias, sampler, (int2)(out_c, 0));
half4 new_scale = read_imageh(new_scale_image, sampler, (int2)(out_c, 0));
half4 new_bias = read_imageh(new_bias_image, sampler, (int2)(out_c, 0));
int pos_x = mad24(out_c, out_width, out_w);
half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
half4 out = mad(in, new_scale, new_bias);
write_imageh(output, (int2)(pos_x, nh), out);
write_imageh(output, (int2)(pos_x, out_nh), out);
}
......@@ -61,7 +61,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
size_t region[3] = {height, width, 1};
clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
0, NULL, NULL);
for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
// for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
}
template class FeedKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册