diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 75177941fc31906bd3110c51ffd32f3ae549f1d0..a6f900a4d144fa06dbbe8f4f8312bed5923b8b33 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,6 +9,8 @@ stages: - build_mobile check:prebuilt: + tags: + - lite stage: ci script: #- pip3 install pre-commit @@ -24,17 +26,21 @@ check:prebuilt: - /root/.cache build:server: + tags: + - lite image: $SERVER_LITE_DOCKER_IMAGE stage: build_server cache: key: server_thirdparty paths: - build/third_party + - /root/.ccache script: - #- export http_proxy=http://172.19.57.45:3128 - #- export https_proxy=http://172.19.57.45:3128 - - export http_proxy=http://agent.baidu.com:8118 - - export https_proxy=http://agent.baidu.com:8118 + - apt install ccache + - export http_proxy=http://172.19.57.45:3128 + - export https_proxy=http://172.19.57.45:3128 + #- export http_proxy=http://agent.baidu.com:8118 + #- export https_proxy=http://agent.baidu.com:8118 - mkdir -p build - cd build - ../paddle/fluid/lite/tools/build.sh cmake_x86 @@ -49,6 +55,8 @@ build:server: - check:prebuilt build:mobile: + tags: + - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE cache: @@ -56,7 +64,9 @@ build:mobile: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - /root/.ccache script: + - apt install ccache - export http_proxy=http://172.19.57.45:3128 - export https_proxy=http://172.19.57.45:3128 - ./paddle/fluid/lite/tools/build.sh build_test_arm diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ef4a4c351e4b701f481b5b23076ea3535fa7231..312bdb7f1ae11576abf6f5ec222bae72bcd67bb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,6 +166,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) #include(external/zlib) # download, build, install gtest include(external/protobuf) # download, build, install protobuf include(external/eigen) # download eigen3 + include(ccache) # set ccache for compilation include(generic) # simplify cmake module include(configure) # add paddle env configuration diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index dd8b5843df37ce423677171f670bbc3918b05c6b..dec0b65eb2791b45bdf3fa54715af97a844342fc 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -13,17 +13,25 @@ // limitations under the License. #include "paddle/fluid/lite/api/cxx_api.h" - -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +#include #include "paddle/fluid/lite/core/mir/passes.h" -#endif - #include "paddle/fluid/lite/core/op_registry.h" - namespace paddle { namespace lite { -void Run(const char* model_dir) { +using Time = decltype(std::chrono::high_resolution_clock::now()); +Time time() { return std::chrono::high_resolution_clock::now(); } +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; +} + +void Run(const char* model_dir, int repeat) { +#ifdef LITE_WITH_ARM + DeviceInfo::Init(); +#endif lite::ExecutorLite predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); @@ -32,13 +40,19 @@ void Run(const char* model_dir) { valid_places); auto* input_tensor = predictor.GetInput(0); - input_tensor->Resize(DDim(std::vector({3, 224, 224}))); + input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); auto* data = input_tensor->mutable_data(); - for (int i = 0; i < 3 * 224 * 224; i++) { - data[i] = i; + for (int i = 0; i < input_tensor->dims().production(); i++) { + data[i] = 1; } - predictor.Run(); + for (int i = 0; i < 10; i++) predictor.Run(); + + auto time1 = time(); + for (int i = 0; i < repeat; i++) predictor.Run(); + auto time2 = time(); + std::cout << " predict cost: " << time_diff(time1, time2) / repeat << "ms" + << std::endl; auto* out = predictor.GetOutput(0); LOG(INFO) << out << " memory size " << out->data_size(); @@ -53,7 +67,7 @@ void Run(const char* model_dir) { int main(int argc, char** argv) { CHECK_EQ(argc, 2) << "usage: ./cmd "; - paddle::lite::Run(argv[1]); + paddle::lite::Run(argv[1], 1); return 0; } @@ -66,7 +80,7 @@ USE_LITE_OP(fetch); USE_LITE_OP(io_copy); USE_LITE_OP(conv2d); -// USE_LITE_OP(batch_norm); +USE_LITE_OP(batch_norm); USE_LITE_OP(relu); USE_LITE_OP(depthwise_conv2d); USE_LITE_OP(pool2d); @@ -85,7 +99,7 @@ USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); -// USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/arm/math/elementwise.cc b/paddle/fluid/lite/arm/math/elementwise.cc index 68140a5d7dbccc9fa0028e9cde3e9d074275a7ee..2a74e7ee4ec4be51b420b1fa2d2a1be7c3f148fb 100644 --- a/paddle/fluid/lite/arm/math/elementwise.cc +++ b/paddle/fluid/lite/arm/math/elementwise.cc @@ -41,15 +41,15 @@ void elementwise_add(const float* dinx, const float* diny, float* dout, float32x4_t diny2 = vld1q_f32(diny_ptr + 8); float32x4_t diny3 = vld1q_f32(diny_ptr + 12); - float32x4_t vsum0 = vaddq_f32(dinx0, diny0); - float32x4_t vsum1 = vaddq_f32(dinx1, diny1); - float32x4_t vsum2 = vaddq_f32(dinx2, diny2); - float32x4_t vsum3 = vaddq_f32(dinx3, diny3); + dinx0 = vaddq_f32(dinx0, diny0); + dinx1 = vaddq_f32(dinx1, diny1); + dinx2 = vaddq_f32(dinx2, diny2); + dinx3 = vaddq_f32(dinx3, diny3); - vst1q_f32(dout_ptr, vsum0); - vst1q_f32(dout_ptr + 4, vsum1); - vst1q_f32(dout_ptr + 8, vsum2); - vst1q_f32(dout_ptr + 12, vsum3); + vst1q_f32(dout_ptr, dinx0); + vst1q_f32(dout_ptr + 4, dinx1); + vst1q_f32(dout_ptr + 8, dinx2); + vst1q_f32(dout_ptr + 12, dinx3); } if (remain > 0) { const float* dinx_ptr = dinx + (cnt << 4); @@ -64,6 +64,69 @@ void elementwise_add(const float* dinx, const float* diny, float* dout, } } +template <> +void elementwise_add_axis(const float* dinx, const float* diny, + float* dout, int batch, int channels, + int num) { +#pragma omp parallel for collapse(2) + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const float* din_ptr = dinx + offset; + const float diny_data = diny[j]; + float* dout_ptr = dout + offset; + + int cnt = num >> 4; + int remain = num % 16; + float32x4_t rb = vdupq_n_f32(diny_data); + for (int k = 0; k < cnt; ++k) { + float32x4_t din0 = vld1q_f32(din_ptr); + float32x4_t din1 = vld1q_f32(din_ptr + 4); + float32x4_t din2 = vld1q_f32(din_ptr + 8); + float32x4_t din3 = vld1q_f32(din_ptr + 12); + + din0 = vaddq_f32(din0, rb); + din1 = vaddq_f32(din1, rb); + din2 = vaddq_f32(din2, rb); + din3 = vaddq_f32(din3, rb); + + vst1q_f32(dout_ptr, din0); + vst1q_f32(dout_ptr + 4, din1); + vst1q_f32(dout_ptr + 8, din2); + vst1q_f32(dout_ptr + 12, din3); + din_ptr += 16; + dout_ptr += 16; + } + if (remain >= 8) { + float32x4_t din0 = vld1q_f32(din_ptr); + float32x4_t din1 = vld1q_f32(din_ptr + 4); + din0 = vaddq_f32(din0, rb); + din1 = vaddq_f32(din1, rb); + vst1q_f32(dout_ptr, din0); + vst1q_f32(dout_ptr + 4, din1); + din_ptr += 8; + dout_ptr += 8; + remain -= 8; + } + if (remain >= 4) { + float32x4_t din0 = vld1q_f32(din_ptr); + din0 = vaddq_f32(din0, rb); + vst1q_f32(dout_ptr, din0); + din_ptr += 4; + dout_ptr += 4; + remain -= 4; + } + if (remain > 0) { + for (int p = 0; p < remain; p++) { + *dout_ptr = *din_ptr + diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } +} + } // namespace math } // namespace arm } // namespace lite diff --git a/paddle/fluid/lite/arm/math/elementwise.h b/paddle/fluid/lite/arm/math/elementwise.h index cf4c8e46b0703a888bc9ac9a4a395d4e57ba886d..ca8f87895fcea80f9a1a178a0bf43b34c44182bb 100644 --- a/paddle/fluid/lite/arm/math/elementwise.h +++ b/paddle/fluid/lite/arm/math/elementwise.h @@ -22,6 +22,10 @@ namespace math { template void elementwise_add(const T* dinx, const T* diny, T* dout, int num); +template +void elementwise_add_axis(const T* dinx, const T* diny, T* dout, int batch, + int channels, int num); + } // namespace math } // namespace arm } // namespace lite diff --git a/paddle/fluid/lite/core/mir/passes.h b/paddle/fluid/lite/core/mir/passes.h index 6e329a192277a9f0a76afa0ed54018cc3f12d7b7..b2ee259d5c673689201d933a6248b77dd704e20e 100644 --- a/paddle/fluid/lite/core/mir/passes.h +++ b/paddle/fluid/lite/core/mir/passes.h @@ -21,6 +21,7 @@ namespace mir {} // namespace mir } // namespace lite } // namespace paddle +#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK USE_MIR_PASS(demo); USE_MIR_PASS(lite_fc_fuse_pass); USE_MIR_PASS(lite_conv_elementwise_add_act_fuse_pass); @@ -30,6 +31,7 @@ USE_MIR_PASS(type_target_transform_pass); USE_MIR_PASS(generate_program_pass); USE_MIR_PASS(io_copy_kernel_pick_pass); USE_MIR_PASS(argument_type_display_pass); +#endif USE_MIR_PASS(runtime_context_assign_pass); USE_MIR_PASS(lite_conv_bn_fuse_pass); USE_MIR_PASS(graph_visualze); diff --git a/paddle/fluid/lite/core/naive_test_model.py b/paddle/fluid/lite/core/naive_test_model.py index 832661e5ee86f2759acfeb4a6a410cce6050ad53..f89a5e115fa805bab818cabeab1d63cac00158d0 100644 --- a/paddle/fluid/lite/core/naive_test_model.py +++ b/paddle/fluid/lite/core/naive_test_model.py @@ -18,10 +18,10 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid.backward import append_backward -a = fluid.layers.data(name="a", shape=[100], dtype='float32') -label = fluid.layers.data(name="label", shape=[100], dtype='float32') +a = fluid.layers.data(name="a", shape=[2], dtype='float32') +label = fluid.layers.data(name="label", shape=[10], dtype='float32') -a1 = fluid.layers.fc(input=a, size=500, act=None, bias_attr=False) +a1 = fluid.layers.fc(input=a, size=3, act=None, bias_attr=False) cost = fluid.layers.square_error_cost(a1, label) avg_cost = fluid.layers.mean(cost) @@ -36,7 +36,7 @@ exe.run(fluid.default_startup_program()) with open('startup_program.pb', 'wb') as f: f.write(fluid.default_startup_program().desc.serialize_to_string()) -data_1 = np.array(numpy.random.random([100, 100]), dtype='float32') +#data_1 = np.array(numpy.random.random([100, 100]), dtype='float32') #fluid.default_main_program().desc. @@ -50,7 +50,7 @@ with open('main_program.pb', 'wb') as f: #outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost]) -sys.exit(0) +#sys.exit(0) fluid.io.save_inference_model("./model2", [a.name], [a1], exe) -print(numpy.array(outs)) +#print(numpy.array(outs)) diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h index 5e94d5d5d16bd506f863ead9ca65eeaaa122ba80..a3e0641b1c7a44809e2a8fdc1b34a49772f71085 100644 --- a/paddle/fluid/lite/core/optimizer.h +++ b/paddle/fluid/lite/core/optimizer.h @@ -51,8 +51,8 @@ class Optimizer { "lite_conv_bn_fuse_pass", // "lite_conv_elementwise_add_act_fuse_pass", // "lite_fc_fuse_pass", // - "static_kernel_pick_pass", // #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + "static_kernel_pick_pass", // "variable_place_inference_pass", // "argument_type_display_pass", // "type_target_transform_pass", // diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc index 0b464a5df0b0c33e76d2a31db183a515fea7a015..a8a2ac790a3c045642277ef75367bbdd878f0d6d 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc @@ -100,15 +100,15 @@ void ConvCompute::Run() { REGISTER_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ConvCompute, def) .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) + // .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ConvCompute, def) .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) + // .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc index e4d80265d7728fa0eeea97fd070a982a8888ec7e..f25a5cf07452f128681bb4367b7dfc8f7fb09c0d 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc @@ -45,7 +45,7 @@ void conv_compute_ref(const operators::ConvParam& param) { bias_data = param.bias->mutable_data(); } bool flag_bias = bias_data != nullptr; - bool flag_relu = false; // TODO(hong19860320) param.relu + bool flag_relu = param.fuse_relu; int num = input_dims[0]; int chout = output_dims[1]; @@ -183,7 +183,8 @@ TEST(conv_arm, compute) { auto* filter_data = filter.mutable_data(); auto* output_data = output.mutable_data(); for (int i = 0; i < input.dims().production(); i++) { - input_data[i] = static_cast(i % 128); + float sign = i % 3 == 0 ? -1.0f : 1.0f; + input_data[i] = sign * static_cast(i % 128); } for (int i = 0; i < filter.dims().production(); i++) { filter_data[i] = @@ -208,7 +209,7 @@ TEST(conv_arm, compute) { } param.bias = &bias; } - // TODO(hong19860320) param.relu = flag_relu; + param.fuse_relu = flag_relu; param.paddings = std::vector({padding, padding}); param.strides = std::vector({stride, stride}); param.dilations = diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc index 310cde17bbd2f235789250fa02f8e8f82f672ff0..e9d9f4927b7ee18b3e18efa69a00dcb1c813bf3b 100644 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc @@ -25,8 +25,31 @@ void ElementwiseAddCompute::Run() { const float* x_data = param.X->data(); const float* y_data = param.Y->data(); float* out_data = param.Out->mutable_data(); - int n = param.X->dims().production(); - lite::arm::math::elementwise_add(x_data, y_data, out_data, n); + int axis = param.axis; + auto x_dims = param.X->dims(); + auto y_dims = param.Y->dims(); + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } + if (x_dims.size() == y_dims.size()) { + lite::arm::math::elementwise_add(x_data, y_data, out_data, + x_dims.production()); + } else { + int batch = 1; + int channels = 1; + int num = 1; + for (int i = 0; i < axis; ++i) { + batch *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + channels *= y_dims[i]; + } + for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { + num *= x_dims[i]; + } + lite::arm::math::elementwise_add_axis(x_data, y_data, out_data, batch, + channels, num); + } } } // namespace arm diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc index 7156d08ce77df9c93ec46c1c55fb3a11df44a308..20b998dc6cfa8a9606fcf0f716470366fdd60338 100644 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc @@ -41,40 +41,97 @@ void elementwise_add_compute_ref(const operators::ElementwiseParam& param) { const dtype* x_data = param.X->data(); const dtype* y_data = param.Y->data(); dtype* out_data = param.Out->mutable_data(); - DDim dim = param.X->dims(); - ASSERT_EQ(dim.data(), param.Out->dims().data()); - for (int i = 0; i < dim.production(); i++) { - out_data[i] = x_data[i] + y_data[i]; + auto x_dims = param.X->dims(); + auto y_dims = param.Y->dims(); + int axis = param.axis; + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } + int batch = 1; + int channels = 1; + int num = 1; + for (int i = 0; i < axis; ++i) { + batch *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + channels *= y_dims[i]; + } + for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { + num *= x_dims[i]; + } + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr + diny_data; + dout_ptr++; + din_ptr++; + } + } } } TEST(elementwise_add, compute) { ElementwiseAddCompute elementwise_add; operators::ElementwiseParam param; + lite::Tensor x, y, output, output_ref; - lite::Tensor x, y, out, out_ref; - x.Resize(DDim(std::vector({2, 3, 4, 5}))); - y.Resize(DDim(std::vector({2, 3, 4, 5}))); - out.Resize(DDim(std::vector({2, 3, 4, 5}))); - out_ref.Resize(DDim(std::vector({2, 3, 4, 5}))); - auto* x_data = x.mutable_data(); - auto* y_data = y.mutable_data(); - auto* out_data = out.mutable_data(); - auto* out_ref_data = out_ref.mutable_data(); - for (int i = 0; i < x.dims().production(); i++) { - x_data[i] = y_data[i] = i; - } + for (auto n : {1, 3, 4, 11}) { + for (auto c : {1, 3, 4, 11}) { + for (auto h : {1, 3, 4, 11}) { + for (auto w : {1, 3, 4, 11}) { + for (auto axis : {-1, 0, 1, 2, 3}) { + for (auto yd : + {std::vector({n}), std::vector({c}), + std::vector({h}), std::vector({w}), + std::vector({n, c}), std::vector({c, h}), + std::vector({h, w}), std::vector({n, c, h}), + std::vector({c, h, w}), + std::vector({n, c, h, w})}) { + auto x_dim = DDim(std::vector({n, c, h, w})); + auto y_dim = DDim(yd); + int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; - param.X = &x; - param.Y = &y; - param.Out = &out; - elementwise_add.SetParam(param); - elementwise_add.Run(); + if (axis_t + y_dim.size() > 4) continue; + bool flag = false; + for (int i = 0; i < y_dim.size(); i++) { + if (x_dim[i + axis_t] != y_dim[i]) flag = true; + } + if (flag) continue; - param.Out = &out_ref; - elementwise_add_compute_ref(param); - for (int i = 0; i < out.dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); + x.Resize(x_dim); + y.Resize(y_dim); + output.Resize(x_dim); + output_ref.Resize(x_dim); + auto* x_data = x.mutable_data(); + auto* y_data = y.mutable_data(); + auto* output_data = output.mutable_data(); + auto* output_ref_data = output_ref.mutable_data(); + for (int i = 0; i < x_dim.production(); i++) { + x_data[i] = i; + } + for (int i = 0; i < y_dim.production(); i++) { + y_data[i] = i; + } + param.X = &x; + param.Y = &y; + param.axis = axis; + param.Out = &output; + elementwise_add.SetParam(param); + elementwise_add.Run(); + param.Out = &output_ref; + elementwise_add_compute_ref(param); + for (int i = 0; i < output.dims().production(); i++) { + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } + } + } + } + } + } } } diff --git a/paddle/fluid/lite/kernels/arm/pool_compute.cc b/paddle/fluid/lite/kernels/arm/pool_compute.cc index 6a7716fae6bfc3aa52dad7c8b8192191e986b6f3..168b0e50c98bcf8eab324b627478a7790e665b82 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute.cc +++ b/paddle/fluid/lite/kernels/arm/pool_compute.cc @@ -163,7 +163,7 @@ PrecisionType PoolCompute::precision() const { return PRECISION(kFloat); } } // namespace lite } // namespace paddle -REGISTER_LITE_KERNEL(pool, kARM, kFloat, kNCHW, +REGISTER_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::PoolCompute, def) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) diff --git a/paddle/fluid/lite/kernels/arm/pool_compute_test.cc b/paddle/fluid/lite/kernels/arm/pool_compute_test.cc index b024ccef9d526d56bcf52c1600940ff0804eaf1f..844976b963b547d7458f73bec4777281f26bc52a 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/pool_compute_test.cc @@ -272,4 +272,4 @@ TEST(pool, retrive_op) { } // namespace lite } // namespace paddle -USE_LITE_KERNEL(pool, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/arm/relu_compute.h b/paddle/fluid/lite/kernels/arm/relu_compute.h index 29d17bf5918e112dfd065c9cc11910703ab5e92d..def3f02c5046c8f60fb5c6d518361ae8456253a4 100644 --- a/paddle/fluid/lite/kernels/arm/relu_compute.h +++ b/paddle/fluid/lite/kernels/arm/relu_compute.h @@ -45,4 +45,6 @@ class ReluCompute : public KernelLite { REGISTER_LITE_KERNEL(relu, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ReluCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); diff --git a/paddle/fluid/lite/operators/batch_norm_op_test.cc b/paddle/fluid/lite/operators/batch_norm_op_test.cc index b91c367d92b721c1f96fd5fc92ec0b4f877408e4..9fb02759722e21dcd18276359edf3d84da766d04 100644 --- a/paddle/fluid/lite/operators/batch_norm_op_test.cc +++ b/paddle/fluid/lite/operators/batch_norm_op_test.cc @@ -46,7 +46,7 @@ TEST(batch_norm_op_lite, test) { desc.SetInput("Mean", {"mean"}); desc.SetInput("Variance", {"variance"}); desc.SetOutput("Y", {"y"}); - desc.SetAttr("is_test", true); + desc.SetAttr("is_test", static_cast(1)); desc.SetAttr("use_global_stats", false); desc.SetAttr("epsilon", 1e-5f); desc.SetAttr("momentum", 0.9f); @@ -101,7 +101,7 @@ TEST(batch_norm_op_lite, test_enable_is_test) { desc.SetOutput("VarianceOut", {"variance_out"}); desc.SetOutput("SavedMean", {"saved_mean"}); desc.SetOutput("SavedVariance", {"saved_variance"}); - desc.SetAttr("is_test", false); + desc.SetAttr("is_test", static_cast(0)); desc.SetAttr("use_global_stats", false); desc.SetAttr("epsilon", 1e-5f); desc.SetAttr("momentum", 0.9f); diff --git a/paddle/fluid/lite/operators/conv_op.h b/paddle/fluid/lite/operators/conv_op.h index 3f974ea24890f3596d44fadeae5151a454dcf06d..2eeb399aecc951aeece22b0e78680b29577e4c59 100644 --- a/paddle/fluid/lite/operators/conv_op.h +++ b/paddle/fluid/lite/operators/conv_op.h @@ -56,23 +56,26 @@ class ConvOpLite : public OpLite { if (std::find(input_arg_names.begin(), input_arg_names.end(), "Bias") != input_arg_names.end()) { auto bias_arguments = op_desc.Input("Bias"); - if (bias_arguments.size() != 0) { + if (bias_arguments.size() > 0) { auto bias_var = scope->FindVar(bias_arguments.front()); if (bias_var != nullptr) { - param_.bias = bias_var->GetMutable(); + param_.bias = + const_cast(&(bias_var->Get())); } } } if (std::find(input_arg_names.begin(), input_arg_names.end(), "ResidualData") != input_arg_names.end()) { - auto res_argument = op_desc.Input("ResidualData"); - if (res_argument.size() != 0) { - auto residual_data_var = scope->FindVar(res_argument.front()); + auto res_data_arguments = op_desc.Input("ResidualData"); + if (res_data_arguments.size() > 0) { + auto residual_data_var = scope->FindVar(res_data_arguments.front()); if (residual_data_var != nullptr) { - param_.residualData = residual_data_var->GetMutable(); + param_.residualData = const_cast( + &(residual_data_var->Get())); } } } + param_.fuse_relu = op_desc.GetAttr("fuse_relu"); return true; } diff --git a/paddle/fluid/lite/operators/pool_op.h b/paddle/fluid/lite/operators/pool_op.h index 2e9a02eec189599ba2fc23da8e7bcc9ebd0ea8b3..29946ed92a445dd7f43ae3f45362780f2912f17a 100644 --- a/paddle/fluid/lite/operators/pool_op.h +++ b/paddle/fluid/lite/operators/pool_op.h @@ -53,17 +53,25 @@ class PoolOpLite : public OpLite { param_.strides = op_desc.GetAttr>("strides"); param_.paddings = op_desc.GetAttr>("paddings"); - param_.exclusive = op_desc.GetAttr("exclusive"); - param_.adaptive = op_desc.GetAttr("adaptive"); - param_.ceil_mode = op_desc.GetAttr("ceil_mode"); - param_.use_quantizer = op_desc.GetAttr("use_quantizer"); + if (op_desc.HasAttr("exclusive")) { + param_.exclusive = op_desc.GetAttr("exclusive"); + } + if (op_desc.HasAttr("adaptive")) { + param_.adaptive = op_desc.GetAttr("adaptive"); + } + if (op_desc.HasAttr("ceil_mode")) { + param_.ceil_mode = op_desc.GetAttr("ceil_mode"); + } + if (op_desc.HasAttr("use_quantizer")) { + param_.use_quantizer = op_desc.GetAttr("use_quantizer"); + } // param_.data_format = op_desc.GetAttr("data_format"); return true; } void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } - std::string DebugString() const override { return "pool"; } + std::string DebugString() const override { return "pool2d"; } private: mutable PoolParam param_; diff --git a/paddle/fluid/lite/operators/pool_op_test.cc b/paddle/fluid/lite/operators/pool_op_test.cc index 9ab2865f1d04f2ca173b9d2f5f7d9e457f6754e8..e9616ede5a49671d70094edc45224fb4a5a7a927 100644 --- a/paddle/fluid/lite/operators/pool_op_test.cc +++ b/paddle/fluid/lite/operators/pool_op_test.cc @@ -38,7 +38,7 @@ TEST(pool_op_lite, test) { // prepare op desc cpp::OpDesc desc; - desc.SetType("pool"); + desc.SetType("pool2d"); desc.SetInput("X", {"x"}); desc.SetOutput("Out", {"output"}); @@ -69,7 +69,7 @@ TEST(pool_op_lite, test) { bool use_quantizer{false}; desc.SetAttr("use_quantizer", use_quantizer); - PoolOpLite pool("pool"); + PoolOpLite pool("pool2d"); pool.SetValidPlaces({Place{TARGET(kARM), PRECISION(kFloat)}}); pool.Attach(desc, &scope); auto kernels = pool.CreateKernels({Place{TARGET(kARM), PRECISION(kFloat)}}); @@ -86,5 +86,5 @@ TEST(pool_op_lite, test) { } // namespace paddle #ifdef LITE_WITH_ARM -USE_LITE_KERNEL(pool, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); #endif diff --git a/paddle/fluid/lite/operators/split_op.cc b/paddle/fluid/lite/operators/split_op.cc index 0d5075b0971e4bd98de8aac9810bbe7514c1a562..58768276377edd9ea92356a808a6f46c3b5c6a80 100644 --- a/paddle/fluid/lite/operators/split_op.cc +++ b/paddle/fluid/lite/operators/split_op.cc @@ -37,7 +37,7 @@ bool SplitOp::InferShape() const { const auto §ions = param_.sections; const int outs_number = outs.size(); - std::vector outs_dims; + std::vector outs_dims; outs_dims.reserve(outs_number); if (num > 0) {