提交 94e70a44 编写于 作者: L liuruilong

update batch norm

上级 b41fcacc
...@@ -37,7 +37,7 @@ limitations under the License. */ ...@@ -37,7 +37,7 @@ limitations under the License. */
#include "framework/cl/cl_image.h" #include "framework/cl/cl_image.h"
#endif #endif
int debug_to = 2; int debug_to = 4;
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
......
...@@ -47,15 +47,20 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) { ...@@ -47,15 +47,20 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) {
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
} }
delete[](new_scale_ptr);
delete[](new_bias_ptr);
framework::CLImage *new_scale = new framework::CLImage(); framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext());
framework::CLImage *new_bias = new framework::CLImage(); framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext());
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
return true; return true;
} }
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
__kernel void batchnorm(__private const int out_height, __kernel void batchnorm(__private const int out_height,
__private const int out_width, __private const int out_width,
__read_only image2d_t input, __read_only image2d_t input,
__read_only image2d_t new_scale, __read_only image2d_t new_scale_image,
__read_only image2d_t new_bias, __read_only image2d_t new_bias_image,
__write_only image2d_t output) { __write_only image2d_t output) {
const int out_c = get_global_id(0); const int out_c = get_global_id(0);
const int out_w = get_global_id(1); const int out_w = get_global_id(1);
...@@ -13,12 +13,12 @@ __kernel void batchnorm(__private const int out_height, ...@@ -13,12 +13,12 @@ __kernel void batchnorm(__private const int out_height,
const sampler_t sampler = const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
half4 new_scale = read_imageh(bn_scale, sampler, (int2)(out_c, 0)); half4 new_scale = read_imageh(new_scale_image, sampler, (int2)(out_c, 0));
half4 new_bias = read_imageh(bn_bias, sampler, (int2)(out_c, 0)); half4 new_bias = read_imageh(new_bias_image, sampler, (int2)(out_c, 0));
int pos_x = mad24(out_c, out_width, out_w); int pos_x = mad24(out_c, out_width, out_w);
half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh)); half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
half4 out = mad(in, new_scale, new_bias); half4 out = mad(in, new_scale, new_bias);
write_imageh(output, (int2)(pos_x, nh), out); write_imageh(output, (int2)(pos_x, out_nh), out);
} }
...@@ -61,7 +61,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -61,7 +61,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
size_t region[3] = {height, width, 1}; size_t region[3] = {height, width, 1};
clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out, clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
0, NULL, NULL); 0, NULL, NULL);
for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]); // for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
} }
template class FeedKernel<GPU_CL, float>; template class FeedKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册