diff --git a/CMakeLists.txt b/CMakeLists.txt index 316967de93cbd0fbda5d53e46c827338b9d0aabf..99032ada82dc6c0e085bc9d4e6b98ee204f63d33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,8 +17,10 @@ elseif(FPGA) add_definitions(-DPADDLE_MOBILE_FPGA) endif() +set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") if (DEBUGING) set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}") else() set(CMAKE_BUILD_TYPE Release) endif () @@ -55,7 +57,6 @@ else () add_definitions(-DX86) endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build) @@ -126,7 +127,7 @@ else () add_definitions(-DCONV_OP) add_definitions(-DDEPTHWISECONV_OP) add_definitions(-DELEMENTWISEADD_OP) - add_definitions(-DFUSIONCONVADD_OP) + add_definitions(-DFUSION_CONVADD_OP) add_definitions(-DCONVADDRELU_OP) add_definitions(-DFUSION_FC_OP) add_definitions(-DLRN_OP) diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h index f1f7a954e07bc403476d5ac987b94aa9c6700e29..8a7beae993be1a9f2a52fb48d4930754aba784e1 100644 --- a/src/framework/op_registry.h +++ b/src/framework/op_registry.h @@ -96,74 +96,39 @@ class OpRegistry { } }; -#ifdef PADDLE_MOBILE_CPU - -#define REGISTER_OPERATOR_CPU(op_type, op_class) \ - template \ - class _OpClass_##op_type##_cpu : public op_class { \ - public: \ - DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_cpu, op_class); \ - }; \ - static paddle_mobile::framework::OperatorRegistrar< \ - paddle_mobile::CPU, _OpClass_##op_type##_cpu> \ - __op_registrar_##op_type##__cpu(#op_type); \ - int TouchOpRegistrar_##op_type##_cpu() { \ - __op_registrar_##op_type##__cpu.Touch(); \ - return 0; \ +#define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \ + template \ + class _OpClass_##op_type##_##device_name : public op_class { \ + public: \ + DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class); \ + }; \ + static paddle_mobile::framework::OperatorRegistrar< \ + device_type, _OpClass_##op_type##_##device_name> \ + __op_registrar_##op_type##_##device_name(#op_type); \ + int TouchOpRegistrar_##op_type##_##device_name() { \ + __op_registrar_##op_type##_##device_name.Touch(); \ + return 0; \ } -#define USE_OP_CPU(op_type) \ - extern int TouchOpRegistrar_##op_type##_cpu(); \ - static int use_op_itself_##op_type##_ __attribute__((unused)) = \ - TouchOpRegistrar_##op_type##_cpu() - -#endif - -#ifdef PADDLE_MOBILE_MALI_GPU -#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \ - template \ - class _OpClass_##op_type##_mali_gpu : public op_class { \ - public: \ - DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_mali_gpu, op_class); \ - }; \ - static paddle_mobile::framework::OperatorRegistrar< \ - paddle_mobile::CPU, \ - _OpClass_##op_type##_mali_gpu> \ - __op_registrar_##op_type##__mali_gpu(#op_type); \ - int TouchOpRegistrar_##op_type##_mali_gpu() { \ - __op_registrar_##op_type##__mali_gpu.Touch(); \ - return 0; \ - } +#define REGISTER_OPERATOR_CPU(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU); -#define USE_OP_MALI_GPU(op_type) \ - extern int TouchOpRegistrar_##op_type##_mali_gpu(); \ - static int use_op_itself_##op_type##_ __attribute__((unused)) = \ - TouchOpRegistrar_##op_type##_mali_gpu() - -#endif - -#ifdef PADDLE_MOBILE_FPGA -#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ - template \ - class _OpClass_##op_type##_fpga : public op_class { \ - public: \ - DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_fpga, op_class); \ - }; \ - static paddle_mobile::framework::OperatorRegistrar< \ - paddle_mobile::CPU, \ - _OpClass_##op_type##_fpga> \ - __op_registrar_##op_type##__fpga(#op_type); \ - int TouchOpRegistrar_##op_type##_fpga() { \ - __op_registrar_##op_type##__fpga.Touch(); \ - return 0; \ - } +#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI); + +#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA); + +#define USE_OP(op_type, device_name) \ + extern int TouchOpRegistrar_##op_type##_##device_name(); \ + static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \ + TouchOpRegistrar_##op_type##_##device_name() + +#define USE_OP_CPU(op_type) USE_OP(op_type, cpu); -#define USE_OP_FPGA(op_type) \ - extern int TouchOpRegistrar_##op_type##_fpga(); \ - static int use_op_itself_##op_type##_ __attribute__((unused)) = \ - TouchOpRegistrar_##op_type##_fpga() +#define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu); -#endif +#define USE_OP_FPGA(op_type) USE_OP(op_type, fpga); } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index a5f9afebdd1c68d1858679a22d001d42a745c62d..a8f808519130140e76aab1ced34bbc4885314574 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -131,7 +131,6 @@ class Tensor { } PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") int64_t size = numel() * SizeOfType(type); - /* some versions of boost::variant don't have operator!= */ if (holder_ == nullptr || holder_->size() < size + offset_) { holder_.reset(new PlaceholderImpl(size, type)); offset_ = 0; diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add.h index 315a7e93c498347f57178520d20bb810045ea8a7..dc35409b4666aafc7b19c23c02cf6003acdd7dc7 100644 --- a/src/operators/fusion_conv_add.h +++ b/src/operators/fusion_conv_add.h @@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - +#define FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP #pragma once diff --git a/src/operators/kernel/arm/conv_add_kernel.cpp b/src/operators/kernel/arm/conv_add_kernel.cpp index 37ce70071b93fde4bafd0592df6105182d2f09cb..24c68a090592dca70bc403861d0684d375955dbf 100644 --- a/src/operators/kernel/arm/conv_add_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_kernel.cpp @@ -18,6 +18,27 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { +void expand_bias(Tensor &bias, int axis, const DDim &dDim) { + auto bias_ptr = bias.data(); + const DDim bias_ddim = bias.dims(); + PADDLE_MOBILE_ENFORCE(bias.dims().size() == 1, + "the bias tensor's dims size != 1") + DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1); + DDim inner_ddim = + paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size()); + int outer_size = paddle_mobile::framework::product(outer_ddim); + int inner_size = paddle_mobile::framework::product(inner_ddim); + bias.Resize(dDim); + auto new_ptr = bias.mutable_data(); + int axis_size = dDim[axis]; + for (int i = 0; i < outer_size; ++i) { + float v_bias = bias_ptr[i * axis_size / outer_size]; + for (int j = 0; j < inner_size; ++j) { + new_ptr[i * inner_size + j] = v_bias; + } + } +} + template <> void ConvAddKernel::Compute( const FushionConvAddParam ¶m) const { @@ -25,15 +46,16 @@ void ConvAddKernel::Compute( const Tensor *input = param.Input(); Tensor filter = *param.Filter(); + Tensor bias = *param.Bias(); + int axis = param.Axis(); Tensor *output = param.Output(); - output->mutable_data(); + expand_bias(bias, axis, output->dims()); + output->ShareDataWith(bias); int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); std::vector dilations = param.Dilations(); - // DLOG << " compute end get Attrs " << strides[0]; - const int batch_size = static_cast(input->dims()[0]); std::vector filter_shape_vec(framework::vectorize(filter.dims())); @@ -66,7 +88,6 @@ void ConvAddKernel::Compute( framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - DLOG << " filter.dims() = " << filter.dims(); framework::DDim output_matrix_shape = { output->dims()[1], output->numel() / (output->dims()[0] * output->dims()[1])}; @@ -105,7 +126,7 @@ void ConvAddKernel::Compute( Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, - static_cast(0)); + static_cast(1)); } } } diff --git a/src/operators/kernel/conv_add_kernel.h b/src/operators/kernel/conv_add_kernel.h index f1d3e1a14f40cb40f145de185bd43ae0612096b8..a6b8b3311bb5c7c6a7a809ddc82e070bff41c794 100644 --- a/src/operators/kernel/conv_add_kernel.h +++ b/src/operators/kernel/conv_add_kernel.h @@ -17,6 +17,7 @@ limitations under the License. */ #pragma once #include +#include "framework/ddim.h" #include "framework/operator.h" #include "operators/math/im2col.h" #include "operators/math/math_function.h" @@ -26,6 +27,7 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { +using framework::DDim; using framework::OpKernelBase; template diff --git a/tools/build.sh b/tools/build.sh index 580508c7ed17f4e2c76769e601521b6f4b25b5ba..4ac63315a94798d3aca63fb62aef511c4146cd3c 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -15,7 +15,6 @@ build_for_mac() { fi PLATFORM="x86" MODE="Release" - CXX_FLAGS="-std=c++11 -O3 -s" BUILD_DIR=../build/release/"${PLATFORM}" mkdir -p ${BUILD_DIR}/build @@ -25,7 +24,6 @@ build_for_mac() { cmake .. \ -B"${BUILD_DIR}" \ -DCMAKE_BUILD_TYPE="${MODE}" \ - -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ -DIS_MAC=true cd ${BUILD_DIR} @@ -46,11 +44,11 @@ build_for_android() { if [ "${PLATFORM}" = "arm-v7a" ]; then ABI="armeabi-v7a with NEON" ARM_PLATFORM="V7" - CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security" + CXX_FLAGS="-march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security" elif [ "${PLATFORM}" = "arm-v8a" ]; then ABI="arm64-v8a" ARM_PLATFORM="V8" - CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog" + CXX_FLAGS="-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog" else echo "unknown platform!" exit -1 diff --git a/tools/scripts/push2android.sh b/tools/scripts/run_on_android.sh similarity index 100% rename from tools/scripts/push2android.sh rename to tools/scripts/run_on_android.sh