diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index e9596debe7e7a34827c0651e880b3427741df64b..ab9d4f788aa1eb8db8cc38b797c9f097ed260dac 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -57,7 +57,9 @@ void OperatorBase::CheckAllInputOutputSet() const {} template void OperatorBase::Run() { + DLOG << " ----- Begin run impl --- " << type_ << " ----- "; RunImpl(); + DLOG << " ----- End run impl --- " << type_ << " ----- "; #ifdef PADDLE_MOBILE_DEBUG DLOG << "-------------" << type_ << "----------------------------"; vector input_keys = GetInputKeys(); diff --git a/src/operators/kernel/cl/cl_kernel/conv_add_bn_relu_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_add_bn_relu_kernel.cl index 7b98369af1b3850d5a8fe76062c18e1270a35966..074280a23522efbf3220de8fde396d24e2165d30 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_add_bn_relu_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_add_bn_relu_kernel.cl @@ -14,7 +14,6 @@ limitations under the License. */ #pragma OPENCL EXTENSION cl_khr_fp16 : enable - #define BIASE #define BATCH_NORM @@ -54,21 +53,24 @@ __kernel void conv_3x3(__private const int global_size_dim0, ouput_pos_in_one_block.x = out_w; ouput_pos_in_one_block.y = out_nh; + + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + int2 in_pos_in_one_block; in_pos_in_one_block.x = ouput_pos_in_one_block.x * stride + offset; in_pos_in_one_block.y = ouput_pos_in_one_block.y * stride + offset; #ifdef BIASE - half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + half4 output = read_imageh(bias, sampler, (int2)(out_c, 0)); #else half4 output = 0.0f; #endif half4 input[9]; - const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | - CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; + for (int i = 0; i < input_c; ++i) { int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); @@ -139,7 +141,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, int2(out_c, 0)) + read_imageh(new_biase, sampler, int2(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU @@ -250,7 +252,7 @@ __kernel void depth_conv_3x3(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU @@ -321,7 +323,7 @@ __kernel void conv_1x1(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU diff --git a/src/operators/kernel/cl/cl_kernel/conv_add_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_add_kernel.cl index 20041c80a599b0b1b4ffcc55e8931dc012809206..34a687dbb7d9d6424f57f85e94591f3b46e38a1d 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_add_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_add_kernel.cl @@ -56,17 +56,19 @@ __kernel void conv_3x3(__private const int global_size_dim0, in_pos_in_one_block.x = ouput_pos_in_one_block.x * stride + offset; in_pos_in_one_block.y = ouput_pos_in_one_block.y * stride + offset; + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST; + #ifdef BIASE - half4 output = read_imageh(bias, sampler, int2(out_c, 0)); + half4 output = read_imageh(bias, sampler, (int2)(out_c, 0)); #else half4 output = 0.0f; #endif half4 input[9]; - const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | - CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; + for (int i = 0; i < input_c; ++i) { int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); @@ -137,7 +139,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, int2(out_c, 0)) + read_imageh(new_biase, sampler, int2(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU @@ -248,7 +250,7 @@ __kernel void depth_conv_3x3(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU @@ -319,7 +321,7 @@ __kernel void conv_1x1(__private const int global_size_dim0, } #ifdef BATCH_NORM - output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)) + output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0)); #endif #ifdef RELU diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index 20c148d7766f5a3fe8e952766a6ed4188e6487df..7ce60e6d1e9a687a3f6623ff8dd8e07576c02daf 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -117,7 +117,7 @@ void ConvAddBNReluKernel::Compute( auto biase = param.Bias()->GetCLImage(); auto new_scale = param.NewScale()->GetCLImage(); auto new_bias = param.NewBias()->GetCLImage(); - auto output = param.Output(); + auto output = param.Output()->GetCLImage(); int stride = param.Strides()[0]; int offset = param.Offset(); int input_c = param.Input()->CBlock(); diff --git a/test/net/test_mobilenet_GPU.cpp b/test/net/test_mobilenet_GPU.cpp index f69334daf2f24bdd4b41ee58e7236051d1459809..a5a78f7f8f6fe93ca7412f0ec007c291b26417af 100644 --- a/test/net/test_mobilenet_GPU.cpp +++ b/test/net/test_mobilenet_GPU.cpp @@ -23,7 +23,7 @@ int main() { // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", // std::string(g_mobilenet_detect) + "/params", true); - auto isok = paddle_mobile.Load(g_mobilenet, false); + auto isok = paddle_mobile.Load(g_mobilenet, true); if (isok) { auto time2 = paddle_mobile::time(); std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"