diff --git a/CMakeLists.txt b/CMakeLists.txt index 02e4ebb3c79e699c7a383abec6ca7d3c6f0dc87d..83da232f9f56565028ca95876f7ecfbbf355a253 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,27 @@ cmake_minimum_required(VERSION 3.0) project(paddle-mobile) -option(DEBUGING "enable debug mode" OFF) +option(DEBUGING "enable debug mode" ON) option(USE_OPENMP "openmp support" OFF) -option(USE_EXCEPTION "use std exception" OFF) +option(USE_EXCEPTION "use std exception" ON) +option(LOG_PROFILE "log profile" ON) +# select the platform to build +option(CPU "cpu" ON) +option(MALI_GPU "mali gpu" OFF) +option(FPGA "fpga" OFF) + +if (CPU) + add_definitions(-DPADDLE_MOBILE_CPU) +elseif (MALI_GPU) + add_definitions(-DPADDLE_MOBILE_MALI_GPU) +elseif(FPGA) + add_definitions(-DPADDLE_MOBILE_FPGA) +endif() +set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") if (DEBUGING) set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}") else() set(CMAKE_BUILD_TYPE Release) endif () @@ -24,12 +39,17 @@ else() endif() if (USE_EXCEPTION) + message(STATUS "use exception") add_definitions(-DENABLE_EXCEPTION) add_definitions(-fexceptions) else() add_definitions(-fno-exceptions) endif () +if (LOG_PROFILE) + add_definitions(-DPADDLE_MOBILE_PROFILE) +endif() + if(IS_MAC) add_definitions(-DX86) elseif(IS_IOS) @@ -42,7 +62,6 @@ else () add_definitions(-DX86) endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build) @@ -74,6 +93,7 @@ if (googlenet) add_definitions(-DFUSION_FC_OP) add_definitions(-DPOOL_OP) add_definitions(-DRELU_OP) + add_definitions(-DFUSION_CONVADD_OP) elseif (mobilenet) add_definitions(-DCONV_OP) add_definitions(-DELEMENTWISEADD_OP) @@ -112,7 +132,7 @@ else () add_definitions(-DCONV_OP) add_definitions(-DDEPTHWISECONV_OP) add_definitions(-DELEMENTWISEADD_OP) - add_definitions(-DFUSIONCONVADD_OP) + add_definitions(-DFUSION_CONVADD_OP) add_definitions(-DCONVADDRELU_OP) add_definitions(-DFUSION_FC_OP) add_definitions(-DLRN_OP) @@ -127,8 +147,13 @@ else () add_definitions(-DTRANSPOSE_OP) endif() - -add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) +if (IS_IOS) + add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) +elseif(ANDROID) + add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) +else() + add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) +endif () if(DEBUGING) add_subdirectory(test) diff --git a/src/common/types.h b/src/common/types.h index 092c251552ef8007a4000f6b84dce1bbc4059b91..73252749be33312520c17511bbfaceadfde7bf7d 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -99,6 +99,7 @@ static std::unordered_map< std::string, std::pair, std::vector>> op_input_output_key = { {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_CONV_ADD, {{"Input"}, {"Out"}}}, {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, {G_OP_TYPE_MUL, {{"X"}, {"Out"}}}, diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h index 62398dcb15dc61ef2f778b738da0afd073b37908..8a7beae993be1a9f2a52fb48d4930754aba784e1 100644 --- a/src/framework/op_registry.h +++ b/src/framework/op_registry.h @@ -96,24 +96,39 @@ class OpRegistry { } }; -#define REGISTER_OPERATOR(op_type, op_class) \ - template \ - class _OpClass_##op_type##_ : public op_class { \ - public: \ - DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \ - }; \ - static paddle_mobile::framework::OperatorRegistrar< \ - paddle_mobile::CPU, _OpClass_##op_type##_> \ - __op_registrar_##op_type##__(#op_type); \ - int TouchOpRegistrar_##op_type() { \ - __op_registrar_##op_type##__.Touch(); \ - return 0; \ +#define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \ + template \ + class _OpClass_##op_type##_##device_name : public op_class { \ + public: \ + DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class); \ + }; \ + static paddle_mobile::framework::OperatorRegistrar< \ + device_type, _OpClass_##op_type##_##device_name> \ + __op_registrar_##op_type##_##device_name(#op_type); \ + int TouchOpRegistrar_##op_type##_##device_name() { \ + __op_registrar_##op_type##_##device_name.Touch(); \ + return 0; \ } -#define USE_OP(op_type) \ - extern int TouchOpRegistrar_##op_type(); \ - static int use_op_itself_##op_type##_ __attribute__((unused)) = \ - TouchOpRegistrar_##op_type() +#define REGISTER_OPERATOR_CPU(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU); + +#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI); + +#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ + REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA); + +#define USE_OP(op_type, device_name) \ + extern int TouchOpRegistrar_##op_type##_##device_name(); \ + static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \ + TouchOpRegistrar_##op_type##_##device_name() + +#define USE_OP_CPU(op_type) USE_OP(op_type, cpu); + +#define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu); + +#define USE_OP_FPGA(op_type) USE_OP(op_type, fpga); } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index f798d7ade200208c1b199aee0410ed4c297ed7fd..0e8d6f97809f7a204acc74a8c5427d88a63949aa 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -58,7 +58,12 @@ void OperatorBase::Run() const { } template class OperatorBase; +template class OperatorBase; +template class OperatorBase; + template class OperatorWithKernel; +template class OperatorWithKernel; +template class OperatorWithKernel; } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/operator.h b/src/framework/operator.h index d9b74e8887944774aefadf5ead0a74b1f7eac79d..2ea7626711c4161bbbedd5e26cdc895c27cdcd83 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -153,6 +153,7 @@ class FusionOpMatcher { std::string BeginType() { return node_.Type(); } + // virtual bool Fusion(); protected: Node node_; std::string type_; diff --git a/src/framework/tensor.h b/src/framework/tensor.h index a5f9afebdd1c68d1858679a22d001d42a745c62d..a8f808519130140e76aab1ced34bbc4885314574 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -131,7 +131,6 @@ class Tensor { } PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") int64_t size = numel() * SizeOfType(type); - /* some versions of boost::variant don't have operator!= */ if (holder_ == nullptr || holder_->size() < size + offset_) { holder_.reset(new PlaceholderImpl(size, type)); offset_ = 0; diff --git a/src/io/io.cpp b/src/io/io.cpp index 9c34378d99e52e8e2919944a9319e8cc97d6b074..7afb44bb45ab333a2bbda4fee533be995d73a630 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -14,6 +14,10 @@ limitations under the License. */ #include "io.h" #include +#ifdef PADDLE_MOBILE_PROFILE +#include +#include +#endif #include "common/enforce.h" #include "common/log.h" @@ -336,10 +340,34 @@ std::shared_ptr Executor::Predict( feed_tensor->ShareDataWith(t); std::shared_ptr to_predict_block = to_predict_program_->Block(0); +#ifdef PADDLE_MOBILE_PROFILE + std::map _profile; +#endif for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto op = ops_of_block_[*to_predict_block.get()][j]; +#ifdef PADDLE_MOBILE_PROFILE + _profile[op->Type()] = clock(); +#endif op->Run(); +#ifdef PADDLE_MOBILE_PROFILE + _profile[op->Type()] = clock() - _profile[op->Type()]; +#endif } +#ifdef PADDLE_MOBILE_PROFILE + { + DLOG << "========================[ profile ]=========================="; + clock_t _ptotal = 0; + for (auto const &p : _profile) { + _ptotal += p.second; + } + for (auto const &p : _profile) { + DLOG << p.first << std::string(16 - p.first.size(), ' ') << "\t" + << (float)p.second << "\t\t" + << (float)p.second / (float)_ptotal * 100.0; + } + DLOG << "========================[ ]=========================="; + } +#endif auto ops = ops_of_block_[*to_predict_program_->Block(0)]; auto last_op = ops.rbegin(); auto output_map = (*last_op)->Outputs(); diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp index 0de5111d0625b45efd51c5afac989391631a3bed..e36cb24b7c46039463b76635536f0af4c6407824 100644 --- a/src/operators/batchnorm_op.cpp +++ b/src/operators/batchnorm_op.cpp @@ -31,7 +31,13 @@ template class BatchNormOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(batch_norm); -REGISTER_OPERATOR(batch_norm, ops::BatchNormOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(batch_norm); +REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/box_coder_op.cpp b/src/operators/box_coder_op.cpp index 22d006a258ca0cd18b63dc72aed6a02405ff6e81..8dc5da43d0e5594830c48ab2934b0df1875c6a54 100644 --- a/src/operators/box_coder_op.cpp +++ b/src/operators/box_coder_op.cpp @@ -52,7 +52,13 @@ template class BoxCoderOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(box_coder); -REGISTER_OPERATOR(box_coder, ops::BoxCoderOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(box_coder); +REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp index 26f5e7d4e48ee2c3402a821b49757b1b0914828a..b9eacde60ee25a91769317bd441058b4afb3f296 100644 --- a/src/operators/concat_op.cpp +++ b/src/operators/concat_op.cpp @@ -62,7 +62,13 @@ template class ConcatOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(concat); -REGISTER_OPERATOR(concat, ops::ConcatOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(concat); +REGISTER_OPERATOR_CPU(concat, ops::ConcatOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp index c8ec33333f596a6c10491cfdb826f1dc54d69c6f..4be442f6169f65be9cffb2710f4f3aae40e9c905 100644 --- a/src/operators/conv_op.cpp +++ b/src/operators/conv_op.cpp @@ -53,7 +53,17 @@ template class ConvOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(conv2d); -REGISTER_OPERATOR(conv2d, ops::ConvOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(conv2d); +REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(conv2d); +REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(conv2d); +REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); +#endif #endif diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp index 87c9746b4dfa1e74fcf3733656b9b3b27a8740fb..55198fd1160dd1c62cea12879c80539b71690822 100644 --- a/src/operators/depthwise_conv_op.cpp +++ b/src/operators/depthwise_conv_op.cpp @@ -54,7 +54,13 @@ template class DepthwiseConvOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(depthwise_conv2d); -REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(depthwise_conv2d); +REGISTER_OPERATOR_CPU(depthwise_conv2d, ops::DepthwiseConvOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp index ff2cd2598814cf9a270090213c0524c165c66ced..5333dcfdb6602e7be235c4faa3651a86502bc8a4 100644 --- a/src/operators/elementwise_add_op.cpp +++ b/src/operators/elementwise_add_op.cpp @@ -29,7 +29,13 @@ template class ElementwiseAddOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(elementwise_add); -REGISTER_OPERATOR(elementwise_add, ops::ElementwiseAddOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(elementwise_add); +REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 25a82894ea96420e94d9d2e4d70809930a954642..9079dbb0b3d83b2b28a046ae3d78025a24fc4958 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -43,8 +43,14 @@ class FeedOp : public framework::OperatorBase { }; namespace ops = paddle_mobile::operators; -USE_OP(feed); -REGISTER_OPERATOR(feed, ops::FeedOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(feed); +REGISTER_OPERATOR_CPU(feed, ops::FeedOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fetch_op.h b/src/operators/fetch_op.h index 31e17f2b562567de1b4194098995f6ee4cd3caa3..a65338f7f4262de1b74d7a18525f6c3b9551243a 100644 --- a/src/operators/fetch_op.h +++ b/src/operators/fetch_op.h @@ -43,8 +43,14 @@ class FetchOp : public framework::OperatorBase { }; namespace ops = paddle_mobile::operators; -USE_OP(fetch); -REGISTER_OPERATOR(fetch, ops::FetchOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fetch); +REGISTER_OPERATOR_CPU(fetch, ops::FetchOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_conv_add.cpp b/src/operators/fusion_conv_add.cpp index fe380bddca585e434418513d5152c1df0426e80d..21e13d87aff263db39e5a6105d49b9e6bdb9e97b 100644 --- a/src/operators/fusion_conv_add.cpp +++ b/src/operators/fusion_conv_add.cpp @@ -12,20 +12,49 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef FUSIONCONVADD_OP +#ifdef FUSION_CONVADD_OP #include "operators/fusion_conv_add.h" + namespace paddle_mobile { namespace operators { template -void FushionConvAddOp::InferShape() const {} +void FushionConvAddOp::InferShape() const { + auto in_dims = param_.Input()->dims(); + auto filter_dims = param_.Filter()->dims(); + const std::vector &strides = param_.Strides(); + std::vector paddings = param_.Paddings(); + int groups = param_.Groups(); + std::vector dilations = param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + dilations[i], paddings[i], + strides[i])); + } + + framework::DDim ddim = framework::make_ddim(output_shape); + param_.Output()->Resize(ddim); +} template class FushionConvAddOp; } // namespace operators } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(conv_add); -REGISTER_OPERATOR(conv_add, ops::FushionConvAddOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(conv_add); +REGISTER_OPERATOR_CPU(conv_add, ops::FushionConvAddOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add.h index 1e6a84b1dc157e0b58273bc5ff379ad079ed2860..dc35409b4666aafc7b19c23c02cf6003acdd7dc7 100644 --- a/src/operators/fusion_conv_add.h +++ b/src/operators/fusion_conv_add.h @@ -11,16 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#ifdef FUSIONCONVADD_OP +#define FUSION_CONVADD_OP +#ifdef FUSION_CONVADD_OP #pragma once #include #include - #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" +#include "op_param.h" +#include "operators/kernel/conv_add_kernel.h" namespace paddle_mobile { namespace operators { @@ -53,18 +54,37 @@ class FushionConvAddOp : public framework::OperatorWithKernel { const framework::AttributeMap &attrs, std::shared_ptr scope) : framework::OperatorWithKernel(type, inputs, outputs, attrs, - scope) {} + scope), + param_(inputs, outputs, attrs, *scope) {} - void RunImpl() const {} + void RunImpl() const { + operators::ConvAddKernel kernel; + kernel.Compute(param_); + this->ClearVariables({"Filter", "Input", "Y"}); + } using framework::OperatorWithKernel::OperatorWithKernel; void InferShape() const override; protected: - // FushionFcParam param_; + FushionConvAddParam param_; }; -// static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher()); +inline int ConvOutputSize(int input_size, int filter_size, int dilation, + int padding, int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + int output_size = (input_size + 2 * padding - dkernel) / stride + 1; + return output_size; +} + +#ifdef PADDLE_MOBILE_CPU +static framework::FusionOpRegistrar convadd_registrar( + new FusionConvAddMatcher()); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 4825a01be95f31d11418fe114700aaaa248e0d7e..eb8dd205b45ed5f149f1eeeb5ed730a1c3f8b788 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -46,8 +46,14 @@ class ConvAddReluOp { private: }; -// static framework::FusionOpRegistrar fc_registrar( -// new FushionConvAddReluOpMatcher()); +#ifdef PADDLE_MOBILE_CPU +// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar( +// new FushionConvAddReluOpMatcher()); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp index 8f639e212a1a922fb1a943d2582dd692e1bfabee..c85de862027fb21bcbde77dbc9cba22e7e2811d9 100644 --- a/src/operators/fusion_fc_op.cpp +++ b/src/operators/fusion_fc_op.cpp @@ -54,7 +54,13 @@ template class FushionFcOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(fc); -REGISTER_OPERATOR(fc, ops::FushionFcOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fc); +REGISTER_OPERATOR_CPU(fc, ops::FushionFcOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index e5c5c04f6938499c1c39ca2e5120ef3084b9c1ad..839ef07b244e84675cd186f267493eb29095d7e8 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -37,8 +37,6 @@ class FusionFcMatcher : public framework::FusionOpMatcher { void FolderNodes( framework::Node *node, std::vector> *removed_nodes) { - vector> origin_descs = - node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes); } @@ -69,7 +67,14 @@ class FushionFcOp : public framework::OperatorWithKernel { FushionFcParam param_; }; -// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +#ifdef PADDLE_MOBILE_CPU +static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/arm/conv_add_kernel.cpp b/src/operators/kernel/arm/conv_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24c68a090592dca70bc403861d0684d375955dbf --- /dev/null +++ b/src/operators/kernel/arm/conv_add_kernel.cpp @@ -0,0 +1,138 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_CONVADD_OP + +#include "operators/kernel/conv_add_kernel.h" + +namespace paddle_mobile { +namespace operators { + +void expand_bias(Tensor &bias, int axis, const DDim &dDim) { + auto bias_ptr = bias.data(); + const DDim bias_ddim = bias.dims(); + PADDLE_MOBILE_ENFORCE(bias.dims().size() == 1, + "the bias tensor's dims size != 1") + DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1); + DDim inner_ddim = + paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size()); + int outer_size = paddle_mobile::framework::product(outer_ddim); + int inner_size = paddle_mobile::framework::product(inner_ddim); + bias.Resize(dDim); + auto new_ptr = bias.mutable_data(); + int axis_size = dDim[axis]; + for (int i = 0; i < outer_size; ++i) { + float v_bias = bias_ptr[i * axis_size / outer_size]; + for (int j = 0; j < inner_size; ++j) { + new_ptr[i * inner_size + j] = v_bias; + } + } +} + +template <> +void ConvAddKernel::Compute( + const FushionConvAddParam ¶m) const { + DLOG << param; + + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor bias = *param.Bias(); + int axis = param.Axis(); + Tensor *output = param.Output(); + expand_bias(bias, axis, output->dims()); + output->ShareDataWith(bias); + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + math::matmul(filter_slice, false, col_matrix, false, + static_cast(1), &out_slice, + static_cast(1)); + } + } +} +template class ConvAddKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp index 854fa1d185ddb002aa37a10ade0683d841af8793..e7e0941a4d0bf48d86525cc52ee33301cdcbf67e 100644 --- a/src/operators/kernel/arm/relu_kernel.cpp +++ b/src/operators/kernel/arm/relu_kernel.cpp @@ -37,13 +37,71 @@ void ReluKernel::Compute(const ReluParam ¶m) const { auto *out = param.Out(); auto *out_ptr = out->mutable_data(); - ReluFunctor func_; - math::Transform trans; - trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_); + int numel = input_x->numel(); + if (numel > 32) { + asm volatile( + "pld [%[input_x_ptr], #0] \n\t" + "vmov.f32 q8, #0.0 \n\t" + "subs %[num], %[num], #32 \n\t" + "blt end_num_%= \n\t" + "loop_num_%=: \n\t" + "pld [%[input_x_ptr], #1024] \n\t" - // for (int i = 0; i < input_x->numel(); i++) { - // out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0; - // } + "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" + + "vmax.f32 q0, q0, q8 \n\t" + "vmax.f32 q1, q1, q8 \n\t" + "vmax.f32 q2, q2, q8 \n\t" + "vmax.f32 q3, q3, q8 \n\t" + "vmax.f32 q4, q4, q8 \n\t" + "vmax.f32 q5, q5, q8 \n\t" + "vmax.f32 q6, q6, q8 \n\t" + "vmax.f32 q7, q7, q8 \n\t" + + "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" + "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" + "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" + "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" + + "subs %[num], %[num], #32 \n\t" + "bge loop_num_%= \n\t" + "end_num_%=: \n\t" + "cmp %[num], #0 \n\t" + "bge end_%= \n\t" + "mov r6, #4 \n\t" + "mul r5, %[num], r6 \n\t" + "add %[input_x_ptr], %[input_x_ptr], r5 \n\t" + "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" + "vmax.f32 q0, q0, q8 \n\t" + "vmax.f32 q1, q1, q8 \n\t" + "vmax.f32 q2, q2, q8 \n\t" + "vmax.f32 q3, q3, q8 \n\t" + "vmax.f32 q4, q4, q8 \n\t" + "vmax.f32 q5, q5, q8 \n\t" + "vmax.f32 q6, q6, q8 \n\t" + "vmax.f32 q7, q7, q8 \n\t" + "add %[out_ptr], %[out_ptr], r5 \n\t" + "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" + "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" + "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" + "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" + "end_%=: \n\t" + : + : + [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] "r"(numel) + : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "r5", + "r6"); + } else { + ReluFunctor func_; + math::Transform trans; + trans(input_x_ptr, input_x_ptr + numel, out_ptr, func_); + } } } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/conv_add_kernel.h b/src/operators/kernel/conv_add_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..a6b8b3311bb5c7c6a7a809ddc82e070bff41c794 --- /dev/null +++ b/src/operators/kernel/conv_add_kernel.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADD_OP + +#pragma once + +#include +#include "framework/ddim.h" +#include "framework/operator.h" +#include "operators/math/im2col.h" +#include "operators/math/math_function.h" +#include "operators/math/vol2col.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::DDim; +using framework::OpKernelBase; + +template +class ConvAddKernel : public OpKernelBase { + public: + void Compute(const FushionConvAddParam ¶m) const; +}; + +inline bool IsExpand(const std::vector &filter_dim, + const std::vector &strides, + const std::vector &paddings, + const std::vector &dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp index 21badb0d8eaf125a6e46bf3283adca90a175b984..d2c08992a89f0837de318c876fbab2892ee34e89 100644 --- a/src/operators/kernel/fpga/conv_kernel.cpp +++ b/src/operators/kernel/fpga/conv_kernel.cpp @@ -14,15 +14,16 @@ limitations under the License. */ #ifdef CONV_OP +#include "operators/kernel/conv_kernel.h" + namespace paddle_mobile { namespace operators { -// template<> -// void ConvKernel::Compute(const ConvParam ¶m) const -// {} -// -// template class ConvKernel; -} +template <> +void ConvKernel::Compute(const ConvParam ¶m) const {} +template class ConvKernel; + +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/lrn_op.cpp b/src/operators/lrn_op.cpp index f072b22b063c6eb28cb5c0a183b51e6071c82bd3..d159cdf21b4d0f9ab85d24112ecb2a66729a0236 100644 --- a/src/operators/lrn_op.cpp +++ b/src/operators/lrn_op.cpp @@ -29,7 +29,13 @@ template class LrnOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(lrn); -REGISTER_OPERATOR(lrn, ops::LrnOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(lrn); +REGISTER_OPERATOR_CPU(lrn, ops::LrnOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp index 2bd2e0694470518a0220ee020e689e358d70d702..d33bcbfdddba06947c9d04711c39cb619ada536e 100644 --- a/src/operators/mul_op.cpp +++ b/src/operators/mul_op.cpp @@ -55,7 +55,13 @@ template class MulOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(mul); -REGISTER_OPERATOR(mul, ops::MulOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(mul); +REGISTER_OPERATOR_CPU(mul, ops::MulOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/multiclass_nms_op.cpp b/src/operators/multiclass_nms_op.cpp index 1e4c3f8c34020eeeec2e59cb499b7e00c95edb38..e8b5f195feff617a5de55fe1a0b12a0e0cd70ce9 100644 --- a/src/operators/multiclass_nms_op.cpp +++ b/src/operators/multiclass_nms_op.cpp @@ -39,7 +39,13 @@ template class MultiClassNMSOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(multiclass_nms); -REGISTER_OPERATOR(multiclass_nms, ops::MultiClassNMSOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(multiclass_nms); +REGISTER_OPERATOR_CPU(multiclass_nms, ops::MultiClassNMSOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/op_param.cpp b/src/operators/op_param.cpp index 3045ce4d087bad48927fd3054ef7c2941587b5e2..1ec652ab8fcb675b8447a867a3a97bbb29a52ddf 100644 --- a/src/operators/op_param.cpp +++ b/src/operators/op_param.cpp @@ -39,5 +39,31 @@ Print &operator<<(Print &printer, const ConvParam &conv_param) { } #endif +#ifdef FUSION_CONVADD_OP + +Print &operator<<(Print &printer, const FushionConvAddParam &conv_param) { + printer << "parameter of conv_add: " + << "\n"; + printer << " stride: " + << " (" << conv_param.Strides()[0] << conv_param.Strides()[1] << ") " + << "\n"; + printer << " paddings: " + << " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1] + << ") " + << "\n"; + printer << " dilations: " + << " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1] + << ") " + << "\n"; + printer << " groups: " << conv_param.Groups() << "\n"; + printer << " input dims: " << conv_param.Input()->dims() << "\n"; + printer << " filter dims: " << conv_param.Filter()->dims() << "\n"; + printer << " bias dims: " << conv_param.Bias()->dims() << "\n"; + printer << " output dims: " << conv_param.Output()->dims(); + return printer; +} + +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 75f6f5ee2215bd5785c791ab8b1b99adf6fa98ae..bfefaeaa25260ddc80ac336a9769088bf5a06034 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -165,6 +165,8 @@ class OpParam { template static T *GetVarValue(const string &key, const VariableNameMap &var_map, const Scope &scope) { + PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0, + "%s is not contained in var_map", key.c_str()) auto var_vec = var_map.at(key); if (!var_vec.empty()) { auto var = scope.FindVar(var_vec[0]); @@ -787,5 +789,54 @@ class FushionFcParam : public OpParam { }; #endif +#ifdef FUSION_CONVADD_OP +class FushionConvAddParam : public OpParam { + public: + FushionConvAddParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) { + bias_ = InputYFrom(inputs, scope); + axis_ = GetAttr("axis", attrs); + filter_ = FilterFrom(inputs, scope); + input_ = InputFrom(inputs, scope); + output_ = OutFrom(outputs, scope); + strides_ = GetAttr>("strides", attrs); + paddings_ = GetAttr>("paddings", attrs); + dilations_ = GetAttr>("dilations", attrs); + groups = GetAttr("groups", attrs); + } + Tensor *Bias() const { return bias_; } + + const int &Axis() const { return axis_; } + + const Tensor *Input() const { return input_; } + + const Tensor *Filter() const { return filter_; } + + Tensor *Output() const { return output_; } + + const vector &Strides() const { return strides_; } + + const vector &Paddings() const { return paddings_; } + + const vector &Dilations() const { return dilations_; } + + const int &Groups() const { return groups; } + + private: + Tensor *bias_; + int axis_; + Tensor *input_; + Tensor *output_; + Tensor *filter_; + vector strides_; + vector paddings_; + vector dilations_; + int groups; +}; + +Print &operator<<(Print &printer, const FushionConvAddParam &conv_param); +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp index 45a709d20794e9fdfad15f0a3ec499d3f32d5bf9..79b06174af736f693e05fe87985bef1a02886435 100644 --- a/src/operators/pool_op.cpp +++ b/src/operators/pool_op.cpp @@ -59,7 +59,13 @@ template class PoolOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(pool2d); -REGISTER_OPERATOR(pool2d, ops::PoolOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(pool2d); +REGISTER_OPERATOR_CPU(pool2d, ops::PoolOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/prior_box_op.cpp b/src/operators/prior_box_op.cpp index 22f9326b00f41a96de2f6ce3d79f8cbee98fd9f4..f3ae6e5231efd604862e9c7b4dbafd71c71d6f54 100644 --- a/src/operators/prior_box_op.cpp +++ b/src/operators/prior_box_op.cpp @@ -49,7 +49,13 @@ template class PriorBoxOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(prior_box); -REGISTER_OPERATOR(prior_box, ops::PriorBoxOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(prior_box); +REGISTER_OPERATOR_CPU(prior_box, ops::PriorBoxOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp index 3beac260935ce2daf8a5b9f1e6b9be178034ac8d..066772e3bee32b3296d7fb9bebf615cc57702871 100644 --- a/src/operators/relu_op.cpp +++ b/src/operators/relu_op.cpp @@ -33,7 +33,13 @@ template class ReluOp; * 都是需要和model中类型对应起来的 * */ namespace ops = paddle_mobile::operators; -USE_OP(relu); -REGISTER_OPERATOR(relu, ops::ReluOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(relu); +REGISTER_OPERATOR_CPU(relu, ops::ReluOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp index 44d3de2203cc01f6a6acd6810f4e676f6efb6bbd..5d0aa49a26b6c0b2f78b5fcb4b3bd144edaa313c 100644 --- a/src/operators/reshape_op.cpp +++ b/src/operators/reshape_op.cpp @@ -32,7 +32,13 @@ template class ReshapeOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(reshape); -REGISTER_OPERATOR(reshape, ops::ReshapeOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(reshape); +REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/sigmoid_op.cpp b/src/operators/sigmoid_op.cpp index 8be9309d1047a1d892c0c0151375a8baa01cbca3..641b6f29f2f1eaff7304b8e70b12284575a2e246 100644 --- a/src/operators/sigmoid_op.cpp +++ b/src/operators/sigmoid_op.cpp @@ -27,7 +27,13 @@ template class SigmoidOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(sigmoid); -REGISTER_OPERATOR(sigmoid, ops::SigmoidOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(sigmoid); +REGISTER_OPERATOR_CPU(sigmoid, ops::SigmoidOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp index 5973647bfd1624fc4bb71b8112c5d7f8bf9665cd..8d5f669466d5725d877afe9db2adb9441fe488ca 100644 --- a/src/operators/softmax_op.cpp +++ b/src/operators/softmax_op.cpp @@ -27,7 +27,13 @@ template class SoftmaxOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(softmax); -REGISTER_OPERATOR(softmax, ops::SoftmaxOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(softmax); +REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/src/operators/transpose_op.cpp b/src/operators/transpose_op.cpp index 3abebc77ba6733bf1bfa73846ec1a077a494de2f..02a3b16e8d62d094a83c329f52957b4b8b87d805 100644 --- a/src/operators/transpose_op.cpp +++ b/src/operators/transpose_op.cpp @@ -52,7 +52,13 @@ template class TransposeOp; } // namespace paddle_mobile namespace ops = paddle_mobile::operators; -USE_OP(transpose); -REGISTER_OPERATOR(transpose, ops::TransposeOp); +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(transpose); +REGISTER_OPERATOR_CPU(transpose, ops::TransposeOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif #endif diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index ab4fd2fe0d1eaaa58fabc38fbf512a0b860c36f0..d25a9eb7ce83876ca339adf8aff1a027b70ac611 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -20,9 +20,9 @@ int main() { paddle_mobile::Loader loader; bool optimize = false; auto time1 = time(); - // auto program = loader.Load(g_googlenet, optimize); - auto program = loader.Load(g_googlenet_combine + "/model", - g_googlenet_combine + "/params", optimize); + auto program = loader.Load(g_googlenet, optimize); + // auto program = loader.Load(g_googlenet_combine + "/model", + // g_googlenet_combine + "/params", optimize); auto time2 = time(); DLOG << "load cost :" << time_diff(time1, time2) << "ms\n"; paddle_mobile::Executor executor(program, 1, optimize); diff --git a/tools/build.sh b/tools/build.sh index aa59bd3d2834fc83db7bed24e7ee4ac7ea132294..4ac63315a94798d3aca63fb62aef511c4146cd3c 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -15,7 +15,6 @@ build_for_mac() { fi PLATFORM="x86" MODE="Release" - CXX_FLAGS="-std=c++11 -O3 -s" BUILD_DIR=../build/release/"${PLATFORM}" mkdir -p ${BUILD_DIR}/build @@ -25,7 +24,6 @@ build_for_mac() { cmake .. \ -B"${BUILD_DIR}" \ -DCMAKE_BUILD_TYPE="${MODE}" \ - -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ -DIS_MAC=true cd ${BUILD_DIR} @@ -46,11 +44,11 @@ build_for_android() { if [ "${PLATFORM}" = "arm-v7a" ]; then ABI="armeabi-v7a with NEON" ARM_PLATFORM="V7" - CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security" + CXX_FLAGS="-march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security" elif [ "${PLATFORM}" = "arm-v8a" ]; then ABI="arm64-v8a" ARM_PLATFORM="V8" - CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog" + CXX_FLAGS="-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog" else echo "unknown platform!" exit -1 @@ -98,7 +96,7 @@ build_for_ios() { BUILD_DIR=../build/release/"${PLATFORM}" TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" C_FLAGS="-fobjc-abi-version=2 -fobjc-arc -isysroot ${CMAKE_OSX_SYSROOT}" - CXX_FLAGS="-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT}" + CXX_FLAGS="-fobjc-abi-version=2 -fobjc-arc -std=gnu++14 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT}" mkdir -p "${BUILD_DIR}" if [ $# -eq 1 ]; then NET=$1 diff --git a/tools/ios-cmake/ios.toolchain.cmake b/tools/ios-cmake/ios.toolchain.cmake index 5d34c892e146da89a286188f8493d16530844505..a8735adc8d853a5825a23f1ddf129d0a95199275 100644 --- a/tools/ios-cmake/ios.toolchain.cmake +++ b/tools/ios-cmake/ios.toolchain.cmake @@ -1,392 +1,210 @@ -# This file is part of the ios-cmake project. It was retrieved from -# https://github.com/cristeab/ios-cmake.git, which is a fork of -# https://code.google.com/p/ios-cmake/. Which in turn is based off of -# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which -# are included with CMake 2.8.4 -# -# The ios-cmake project is licensed under the new BSD license. -# -# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, -# Kitware, Inc., Insight Software Consortium. All rights reserved. -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# This file is based off of the Platform/Darwin.cmake and -# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 -# It has been altered for iOS development. -# -# Updated by Alex Stewart (alexs.mac@gmail.com) -# -# ***************************************************************************** -# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) -# under the BSD-Clause-3 licence -# ***************************************************************************** -# -# INFORMATION / HELP -# -# The following variables control the behaviour of this toolchain: -# -# IOS_PLATFORM: OS (default) or SIMULATOR or SIMULATOR64 or TVOS or SIMULATOR_TVOS -# OS = Build for iPhoneOS. -# SIMULATOR = Build for x86 i386 iPhone Simulator. -# SIMULATOR64 = Build for x86_64 iPhone Simulator. -# TVOS = Build for AppleTVOS. -# SIMULATOR_TVOS = Build for x86_64 AppleTV Simulator. -# CMAKE_OSX_SYSROOT: Path to the iOS SDK to use. By default this is -# automatically determined from IOS_PLATFORM and xcodebuild, but -# can also be manually specified (although this should not be required). -# CMAKE_IOS_DEVELOPER_ROOT: Path to the Developer directory for the iOS platform -# being compiled for. By default this is automatically determined from -# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should -# not be required). -# ENABLE_BITCODE: (1|0) Enables or disables bitcode support. Default 1 (true) -# ENABLE_ARC: (1|0) Enables or disables ARC support. Default 1 (true, ARC enabled by default) -# IOS_ARCH: (armv7 armv7s arm64 i386 x86_64) If specified, will override the default architectures for the given IOS_PLATFORM -# OS = armv7 armv7s arm64 -# SIMULATOR = i386 -# SIMULATOR64 = x86_64 -# TVOS = arm64 -# SIMULATOR_TVOS = x86_64 +# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake +# files which are included with CMake 2.8.4 +# It has been altered for iOS development + +# Options: # -# This toolchain defines the following variables for use externally: +# IOS_PLATFORM = OS (default) or SIMULATOR or SIMULATOR64 +# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders +# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. +# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. # -# XCODE_VERSION: Version number (not including Build version) of Xcode detected. -# IOS_SDK_VERSION: Version of iOS SDK being used. -# CMAKE_OSX_ARCHITECTURES: Architectures being compiled for (generated from -# IOS_PLATFORM). +# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# By default this location is automatcially chosen based on the IOS_PLATFORM value above. +# If set manually, it will override the default location and force the user of a particular Developer Platform # -# This toolchain defines the following macros for use externally: +# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path. +# If set manually, this will force the use of a specific SDK version + +# Macros: # -# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) -# A convenience macro for setting xcode specific properties on targets. -# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel -# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) +# A convenience macro for setting xcode specific properties on targets +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") # # find_host_package (PROGRAM ARGS) -# A macro used to find executable programs on the host system, not within the -# iOS environment. Thanks to the android-cmake project for providing the -# command. - -# Fix for PThread library not in path -set(CMAKE_THREAD_LIBS_INIT "-lpthread") -set(CMAKE_HAVE_THREADS_LIBRARY 1) -set(CMAKE_USE_WIN32_THREADS_INIT 0) -set(CMAKE_USE_PTHREADS_INIT 1) - -# Get the Xcode version being used. -execute_process(COMMAND xcodebuild -version - OUTPUT_VARIABLE XCODE_VERSION - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") -string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") -message(STATUS "Building with Xcode version: ${XCODE_VERSION}") -# Default to building for iPhoneOS if not specified otherwise, and we cannot -# determine the platform from the CMAKE_OSX_ARCHITECTURES variable. The use -# of CMAKE_OSX_ARCHITECTURES is such that try_compile() projects can correctly -# determine the value of IOS_PLATFORM from the root project, as -# CMAKE_OSX_ARCHITECTURES is propagated to them by CMake. -if (NOT DEFINED IOS_PLATFORM) - if (CMAKE_OSX_ARCHITECTURES) - if (CMAKE_OSX_ARCHITECTURES MATCHES ".*arm.*") - set(IOS_PLATFORM "OS") - elseif (CMAKE_OSX_ARCHITECTURES MATCHES "i386") - set(IOS_PLATFORM "SIMULATOR") - elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") - set(IOS_PLATFORM "SIMULATOR64") - endif() - endif() - if (NOT IOS_PLATFORM) - set(IOS_PLATFORM "OS") - endif() -endif() -set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING - "Type of iOS platform for which to build.") -# Determine the platform name and architectures for use in xcodebuild commands -# from the specified IOS_PLATFORM name. -if (IOS_PLATFORM STREQUAL "OS") - set(XCODE_IOS_PLATFORM iphoneos) - if(NOT IOS_ARCH) - set(IOS_ARCH armv7 armv7s arm64) - endif() -elseif (IOS_PLATFORM STREQUAL "SIMULATOR") - set(XCODE_IOS_PLATFORM iphonesimulator) - if(NOT IOS_ARCH) - set(IOS_ARCH i386) - endif() -elseif(IOS_PLATFORM STREQUAL "SIMULATOR64") - set(XCODE_IOS_PLATFORM iphonesimulator) - if(NOT IOS_ARCH) - set(IOS_ARCH x86_64) - endif() -elseif (IOS_PLATFORM STREQUAL "TVOS") - set(XCODE_IOS_PLATFORM appletvos) - if(NOT IOS_ARCH) - set(IOS_ARCH arm64) - endif() -elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") - set(XCODE_IOS_PLATFORM appletvsimulator) - if(NOT IOS_ARCH) - set(IOS_ARCH x86_64) - endif() -else() - message(FATAL_ERROR "Invalid IOS_PLATFORM: ${IOS_PLATFORM}") -endif() -message(STATUS "Configuring iOS build for platform: ${IOS_PLATFORM}, " - "architecture(s): ${IOS_ARCH}") -# If user did not specify the SDK root to use, then query xcodebuild for it. -if (NOT CMAKE_OSX_SYSROOT) - execute_process(COMMAND xcodebuild -version -sdk ${XCODE_IOS_PLATFORM} Path - OUTPUT_VARIABLE CMAKE_OSX_SYSROOT - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT} for platform: ${IOS_PLATFORM}") -endif() -if (NOT EXISTS ${CMAKE_OSX_SYSROOT}) - message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " - "does not exist.") -endif() -# Specify minimum version of deployment target. -if (NOT DEFINED IOS_DEPLOYMENT_TARGET) - # Unless specified, SDK version 8.0 is used by default as minimum target version. - set(IOS_DEPLOYMENT_TARGET "8.0" - CACHE STRING "Minimum iOS version to build for." ) - message(STATUS "Using the default min-version since IOS_DEPLOYMENT_TARGET not provided!") -endif() -# Use bitcode or not -if (NOT DEFINED ENABLE_BITCODE) - # Unless specified, enable bitcode support by default - set(ENABLE_BITCODE TRUE CACHE BOOL "Wheter or not to enable bitcode") - message(STATUS "Enabling bitcode support by default. ENABLE_BITCODE not provided!") -endif() -# Use ARC or not -if (NOT DEFINED ENABLE_ARC) - # Unless specified, enable ARC support by default - set(ENABLE_ARC TRUE CACHE BOOL "Wheter or not to enable ARC") - message(STATUS "Enabling ARC support by default. ENABLE_ARC not provided!") -endif() -# Get the SDK version information. -execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion - OUTPUT_VARIABLE IOS_SDK_VERSION - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -# Find the Developer root for the specific iOS platform being compiled for -# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in -# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain -# this information from xcrun or xcodebuild. -if (NOT CMAKE_IOS_DEVELOPER_ROOT) - get_filename_component(IOS_PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT} PATH) - get_filename_component(CMAKE_IOS_DEVELOPER_ROOT ${IOS_PLATFORM_SDK_DIR} PATH) -endif() -if (NOT EXISTS ${CMAKE_IOS_DEVELOPER_ROOT}) - message(FATAL_ERROR "Invalid CMAKE_IOS_DEVELOPER_ROOT: " - "${CMAKE_IOS_DEVELOPER_ROOT} does not exist.") -endif() -# Find the C & C++ compilers for the specified SDK. -if (NOT CMAKE_C_COMPILER) - execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang - OUTPUT_VARIABLE CMAKE_C_COMPILER - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") -endif() -if (NOT CMAKE_CXX_COMPILER) - execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ - OUTPUT_VARIABLE CMAKE_CXX_COMPILER - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") -endif() -# Find (Apple's) libtool. -execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find libtool - OUTPUT_VARIABLE IOS_LIBTOOL - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -message(STATUS "Using libtool: ${IOS_LIBTOOL}") -# Configure libtool to be used instead of ar + ranlib to build static libraries. -# This is required on Xcode 7+, but should also work on previous versions of -# Xcode. -set(CMAKE_C_CREATE_STATIC_LIBRARY - "${IOS_LIBTOOL} -static -o ") -set(CMAKE_CXX_CREATE_STATIC_LIBRARY - "${IOS_LIBTOOL} -static -o ") -# Get the version of Darwin (OS X) of the host. -execute_process(COMMAND uname -r - OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -# Standard settings. -set(CMAKE_SYSTEM_NAME Darwin CACHE INTERNAL "") -set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION} CACHE INTERNAL "") -set(UNIX TRUE CACHE BOOL "") -set(APPLE TRUE CACHE BOOL "") -set(IOS TRUE CACHE BOOL "") +# A macro used to find executable programs on the host system, not within the iOS environment. +# Thanks to the android-cmake project for providing the command + +# Standard settings +set (CMAKE_SYSTEM_NAME Darwin) +set (CMAKE_SYSTEM_VERSION 1) +set (UNIX True) +set (APPLE True) +set (IOS True) + +# Required as of cmake 2.8.10 +set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + +# Determine the cmake host system version so we know where to find the iOS SDKs +find_program (CMAKE_UNAME uname /bin /usr/bin /usr/local/bin) +if (CMAKE_UNAME) + exec_program(uname ARGS -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION) + string (REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}") +endif (CMAKE_UNAME) + +# Force the compilers to gcc for iOS +#include (CMakeForceCompiler) +#CMAKE_C_COMPILER (/usr/bin/gcc) +#CMAKE_CXX_COMPILER (/usr/bin/g++) +set(CMAKE_C_COMPILER /usr/bin/gcc) +set(CMAKE_CXX_COMPILER /usr/bin/g++) set(CMAKE_AR ar CACHE FILEPATH "" FORCE) -set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) -# Force unset of OS X-specific deployment target (otherwise autopopulated), -# required as of cmake 2.8.10. -set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING - "Must be empty for iOS builds." FORCE) -# Set the architectures for which to build. -set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS") -# Skip the platform compiler checks for cross compiling. -set(CMAKE_CXX_COMPILER_FORCED TRUE) -set(CMAKE_CXX_COMPILER_WORKS TRUE) -set(CMAKE_C_COMPILER_FORCED TRUE) -set(CMAKE_C_COMPILER_WORKS TRUE) -# All iOS/Darwin specific settings - some may be redundant. -set(CMAKE_SHARED_LIBRARY_PREFIX "lib") -set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") -set(CMAKE_SHARED_MODULE_PREFIX "lib") -set(CMAKE_SHARED_MODULE_SUFFIX ".so") -set(CMAKE_MODULE_EXISTS 1) -set(CMAKE_DL_LIBS "") -set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") -set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") -set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") -set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") -message(STATUS "Building for minimum iOS version: ${IOS_DEPLOYMENT_TARGET}" - " (SDK version: ${IOS_SDK_VERSION})") -# Note that only Xcode 7+ supports the newer more specific: -# -m${XCODE_IOS_PLATFORM}-version-min flags, older versions of Xcode use: -# -m(ios/ios-simulator)-version-min instead. -if (IOS_PLATFORM STREQUAL "OS") - if (XCODE_VERSION VERSION_LESS 7.0) - set(XCODE_IOS_PLATFORM_VERSION_FLAGS - "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") - else() - # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. - set(XCODE_IOS_PLATFORM_VERSION_FLAGS - "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") - endif() -elseif (IOS_PLATFORM STREQUAL "TVOS") - set(XCODE_IOS_PLATFORM_VERSION_FLAGS - "-mtvos-version-min=${IOS_DEPLOYMENT_TARGET}") -elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") - set(XCODE_IOS_PLATFORM_VERSION_FLAGS - "-mtvos-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") -else() - # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. - set(XCODE_IOS_PLATFORM_VERSION_FLAGS - "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") -endif() -message(STATUS "Version flags set to: ${XCODE_IOS_PLATFORM_VERSION_FLAGS}") - -if (ENABLE_BITCODE) - set(BITCODE "-fembed-bitcode") - message(STATUS "Enabling bitcode support.") -else() - set(BITCODE "") - message(STATUS "Disabling bitcode support.") -endif() - -if (ENABLE_ARC) - set(FOBJC_ARC "-fobjc-arc") - message(STATUS "Enabling ARC support.") -else() - set(FOBJC_ARC "-fno-objc-arc") - message(STATUS "Disabling ARC support.") -endif() - -set(CMAKE_C_FLAGS -"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} -fobjc-abi-version=2 ${FOBJC_ARC} ${C_FLAGS}") -# Hidden visibilty is required for C++ on iOS. -set(CMAKE_CXX_FLAGS -"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} -fvisibility=hidden -fvisibility-inlines-hidden -fobjc-abi-version=2 ${FOBJC_ARC} ${CXX_FLAGS}") -set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DNDEBUG -Os -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_MINSIZEREL}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG -O2 -g -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELWITHDEBINFO}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELEASE}") -set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${C_LINK_FLAGS}") -set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CXX_LINK_FLAGS}") - -# In order to ensure that the updated compiler flags are used in try_compile() -# tests, we have to forcibly set them in the CMake cache, not merely set them -# in the local scope. -list(APPEND VARS_TO_FORCE_IN_CACHE - CMAKE_C_FLAGS - CMAKE_CXX_FLAGS - CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS_MINSIZEREL - CMAKE_CXX_FLAGS_RELEASE - CMAKE_C_LINK_FLAGS - CMAKE_CXX_LINK_FLAGS) -foreach(VAR_TO_FORCE ${VARS_TO_FORCE_IN_CACHE}) - set(${VAR_TO_FORCE} "${${VAR_TO_FORCE}}" CACHE STRING "" FORCE) -endforeach() - -set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) -set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") -set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") -set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") -set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") -set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") -# Hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old -# build tree (where install_name_tool was hardcoded) and where -# CMAKE_INSTALL_NAME_TOOL isn't in the cache and still cmake didn't fail in -# CMakeFindBinUtils.cmake (because it isn't rerun) hardcode -# CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did -# before, Alex. + +# Skip the platform compiler checks for cross compiling +set (CMAKE_CXX_COMPILER_WORKS TRUE) +set (CMAKE_C_COMPILER_WORKS TRUE) + +# All iOS/Darwin specific settings - some may be redundant +set (CMAKE_SHARED_LIBRARY_PREFIX "lib") +set (CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set (CMAKE_SHARED_MODULE_PREFIX "lib") +set (CMAKE_SHARED_MODULE_SUFFIX ".so") +set (CMAKE_MODULE_EXISTS 1) +set (CMAKE_DL_LIBS "") + +set (CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set (CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set (CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +# Hidden visibilty is required for cxx on iOS +set (CMAKE_C_FLAGS_INIT "") +set (CMAKE_CXX_FLAGS_INIT "-fvisibility=hidden -fvisibility-inlines-hidden") + +set (CMAKE_C_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") +set (CMAKE_CXX_LINK_FLAGS "-Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + +set (CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set (CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") +set (CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +set (CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set (CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set (CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree +# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache +# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) +# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex if (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) endif (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) -# Set the find root to the iOS developer roots and to user defined paths. -set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_OSX_SYSROOT} - ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root" FORCE) -# Default to searching for frameworks first. -set(CMAKE_FIND_FRAMEWORK FIRST) -# Set up the default search directories for frameworks. -set(CMAKE_SYSTEM_FRAMEWORK_PATH - ${CMAKE_OSX_SYSROOT}/System/Library/Frameworks - ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks - ${CMAKE_OSX_SYSROOT}/Developer/Library/Frameworks) -# Only search the specified iOS SDK, not the remainder of the host filesystem. -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -# This little macro lets you set any XCode specific property. -macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) - set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") - if (XCODE_RELVERSION_I STREQUAL "All") - set_property(TARGET ${TARGET} PROPERTY - XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") - else() - set_property(TARGET ${TARGET} PROPERTY - XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") - endif() -endmacro(set_xcode_property) -# This macro lets you find executable programs on the host system. -macro(find_host_package) - set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) - set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) - set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) - set(IOS FALSE) + +# Setup iOS platform unless specified manually with IOS_PLATFORM +if (NOT DEFINED IOS_PLATFORM) + set (IOS_PLATFORM "OS") +endif (NOT DEFINED IOS_PLATFORM) +set (IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Setup building for arm64 or not +if (NOT DEFINED BUILD_ARM64) + set (BUILD_ARM64 true) +endif (NOT DEFINED BUILD_ARM64) +set (BUILD_ARM64 ${BUILD_ARM64} CACHE STRING "Build arm64 arch or not") + +# Check the platform selection and setup for developer root +if (${IOS_PLATFORM} STREQUAL "OS") + set (IOS_PLATFORM_LOCATION "iPhoneOS.platform") + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") + set (SIMULATOR true) + set (IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR64") + set (SIMULATOR true) + set (IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +else (${IOS_PLATFORM} STREQUAL "OS") + message (FATAL_ERROR "Unsupported IOS_PLATFORM value selected. Please choose OS or SIMULATOR") +endif (${IOS_PLATFORM} STREQUAL "OS") + +# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT +# Note Xcode 4.3 changed the installation location, choose the most recent one available +exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) +set (XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +set (XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +if (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) + if (EXISTS ${XCODE_POST_43_ROOT}) + set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT}) + elseif(EXISTS ${XCODE_PRE_43_ROOT}) + set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT}) + endif (EXISTS ${XCODE_POST_43_ROOT}) +endif (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) +set (CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") + +# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT +if (NOT DEFINED CMAKE_IOS_SDK_ROOT) + file (GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") + if (_CMAKE_IOS_SDKS) + list (SORT _CMAKE_IOS_SDKS) + list (REVERSE _CMAKE_IOS_SDKS) + list (GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) + else (_CMAKE_IOS_SDKS) + message (FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") + endif (_CMAKE_IOS_SDKS) + message (STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") +endif (NOT DEFINED CMAKE_IOS_SDK_ROOT) +set (CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + +# Set the sysroot default to the most recent SDK +set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# set the architecture for iOS +if (${IOS_PLATFORM} STREQUAL "OS") + set (IOS_ARCH armv7 armv7s arm64) +elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") + set (IOS_ARCH i386) +elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR64") + set (IOS_ARCH x86_64) +endif (${IOS_PLATFORM} STREQUAL "OS") + +set (CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") + +# Set the find root to the iOS developer roots and to user defined paths +set (CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root") + +# default to searching for frameworks first +set (CMAKE_FIND_FRAMEWORK FIRST) + +# set up the default search directories for frameworks +set (CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks + ${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks + ) + +# only search the iOS sdks, not the remainder of the host filesystem +set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) +set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + + +# This little macro lets you set any XCode specific property +macro (set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro (set_xcode_property) + + +# This macro lets you find executable programs on the host system +macro (find_host_package) + set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set (IOS FALSE) + find_package(${ARGN}) - set(IOS TRUE) - set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) - set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) - set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -endmacro(find_host_package) + + set (IOS TRUE) + set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro (find_host_package) +