diff --git a/CMakeLists.txt b/CMakeLists.txt index fd846b19b73c330eeeeb0afe68bf13f29a5fc248..a7c7b5449551e27608f6d594e32e9786c2d2f6db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,29 +47,14 @@ include(simd) ################################ Exposed Configurations ####################################### lite_option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) lite_option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ON IF ${AVX_FOUND}) -lite_option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) lite_option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) lite_option(WITH_MKL "Compile PaddlePaddle with MKL support." ON IF ${AVX_FOUND}) lite_option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production" ON) lite_option(WITH_SYSTEM_BLAS "Use system blas library" OFF) -# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. -if(ANDROID OR IOS OR ARMLINUX) - set(WITH_GPU OFF CACHE STRING - "Disable GPU when cross-compiling for Android and iOS" FORCE) - set(WITH_DSO OFF CACHE STRING - "Disable DSO when cross-compiling for Android and iOS" FORCE) - set(WITH_AVX OFF CACHE STRING - "Disable AVX when cross-compiling for Android and iOS" FORCE) - set(WITH_PYTHON OFF CACHE STRING - "Disable PYTHON when cross-compiling for Android and iOS" FORCE) - set(WITH_RDMA OFF CACHE STRING - "Disable RDMA when cross-compiling for Android and iOS" FORCE) - set(WITH_MKL OFF CACHE STRING - "Disable MKL when cross-compiling for Android and iOS" FORCE) -endif() # for lite, both server and mobile framework. lite_option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF) +lite_option(LITE_WITH_PYTHON "Enable Python api lib in lite mode" OFF) lite_option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON) lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) @@ -86,6 +71,22 @@ lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF) # publish options lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) +# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. +if(ANDROID OR IOS OR ARMLINUX) + set(WITH_GPU OFF CACHE STRING + "Disable GPU when cross-compiling for Android and iOS" FORCE) + set(WITH_DSO OFF CACHE STRING + "Disable DSO when cross-compiling for Android and iOS" FORCE) + set(WITH_AVX OFF CACHE STRING + "Disable AVX when cross-compiling for Android and iOS" FORCE) + set(LITE_WITH_PYTHON OFF CACHE STRING + "Disable PYTHON when cross-compiling for Android and iOS" FORCE) + set(WITH_RDMA OFF CACHE STRING + "Disable RDMA when cross-compiling for Android and iOS" FORCE) + set(WITH_MKL OFF CACHE STRING + "Disable MKL when cross-compiling for Android and iOS" FORCE) +endif() + set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") @@ -110,6 +111,12 @@ include_directories("${PADDLE_SOURCE_DIR}") set(LITE_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}") include_directories("${LITE_GENERATED_INCLUDE_DIR}") +if (LITE_WITH_PYTHON) + include(external/python) # download, build, install python + include(external/pybind11) # download, build, install pybind11 +endif() + + # for mobile if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(STATUS "Building the mobile framework") diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 79b20a4ada9b9bc760af00e0945610b1b2f7581f..d571925b20616fe724583ec06343a4084269c858 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -5,7 +5,7 @@ endif() set(paddle_known_gpu_archs "30 35 50 52 60 61 70") set(paddle_known_gpu_archs7 "30 35 50 52") set(paddle_known_gpu_archs8 "30 35 50 52 60 61") -set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") +set(paddle_known_gpu_archs9 "30 35 50 52 60 61 62 70") set(paddle_known_gpu_archs10 "30 35 50 52 60 61 62 70 75") ###################################################################################### diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake new file mode 100644 index 0000000000000000000000000000000000000000..df8562dff531bc7effbc3978a97fcaabacdce02b --- /dev/null +++ b/cmake/external/pybind11.cmake @@ -0,0 +1,46 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT LITE_WITH_PYTHON) + return() +endif() + +include(ExternalProject) + +set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) + +include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) + +ExternalProject_Add( + extern_pybind + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/pybind/pybind11.git" + GIT_TAG "v2.2.4" + PREFIX ${PYBIND_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if(${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";") + add_library(pybind STATIC ${dummyfile}) +else() + add_library(pybind INTERFACE) +endif() + +add_dependencies(pybind extern_pybind) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ae99f4df9a3676ae8f5b2c4c01305ead9b7a8254 --- /dev/null +++ b/cmake/external/python.cmake @@ -0,0 +1,83 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT LITE_WITH_PYTHON) + return() +ENDIF() + +INCLUDE(python_module) + +FIND_PACKAGE(PythonInterp ${PY_VERSION} REQUIRED) +FIND_PACKAGE(PythonLibs ${PY_VERSION} REQUIRED) + +if(WIN32) + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" +"from distutils import sysconfig as s;import sys;import struct; +print(sys.prefix); +print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); +" + RESULT_VARIABLE _PYTHON_SUCCESS + OUTPUT_VARIABLE _PYTHON_VALUES + ERROR_VARIABLE _PYTHON_ERROR_VALUE) + + if(NOT _PYTHON_SUCCESS MATCHES 0) + set(PYTHONLIBS_FOUND FALSE) + return() + endif() + + # Convert the process output into a list + string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) + string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) + list(GET _PYTHON_VALUES 0 PYTHON_PREFIX) + list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX) + + # Make sure all directory separators are '/' + string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) + + set(PYTHON_LIBRARY + "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + + # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the + # original python installation. They may be found relative to PYTHON_INCLUDE_DIR. + if(NOT EXISTS "${PYTHON_LIBRARY}") + get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY) + set(PYTHON_LIBRARY + "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + endif() + + # raise an error if the python libs are still not found. + if(NOT EXISTS "${PYTHON_LIBRARY}") + message(FATAL_ERROR "Python libraries not found") + endif() + SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") +endif(WIN32) + +# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. +ADD_LIBRARY(python SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) + +SET(py_env "") +IF(PYTHONINTERP_FOUND) + find_python_module(pip REQUIRED) + find_python_module(numpy REQUIRED) + #find_python_module(wheel REQUIRED) + #find_python_module(google.protobuf REQUIRED) + FIND_PACKAGE(NumPy REQUIRED) + #IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + # MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " + # "please use pip to upgrade protobuf. pip install -U protobuf") + #ENDIF() +ENDIF(PYTHONINTERP_FOUND) +INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) +INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) diff --git a/cmake/python_module.cmake b/cmake/python_module.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1412b7f7f20600acf95a4a899f5e6529c3b67a35 --- /dev/null +++ b/cmake/python_module.cmake @@ -0,0 +1,43 @@ +# Find if a Python module is installed +# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html +# To use do: find_python_module(PyQt4 REQUIRED) +function(find_python_module module) + string(TOUPPER ${module} module_upper) + if(NOT PY_${module_upper}) + if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") + set(${module}_FIND_REQUIRED TRUE) + else() + set(${module}_FIND_REQUIRED FALSE) + endif() + # A module's location is usually a directory, but for binary modules + # it's a .so file. + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))" + RESULT_VARIABLE _${module}_status + OUTPUT_VARIABLE _${module}_location + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper} ${_${module}_location} CACHE STRING + "Location of Python module ${module}") + endif(NOT _${module}_status) + endif(NOT PY_${module_upper}) + find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) + if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) + message(FATAL_ERROR "python module ${module} is not found") + endif() + + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import sys, ${module}; sys.stdout.write(${module}.__version__)" + OUTPUT_VARIABLE _${module}_version + RESULT_VARIABLE _${module}_status + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING + "Version of Python module ${module}") + endif(NOT _${module}_status) + + set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE) + set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE) +endfunction(find_python_module) diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index 80273cb69d73f98fcd0c53ad6631acb00b897899..5e0fa705a92c7b86e101996f49a4469a0a447e93 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -222,6 +222,10 @@ if (LITE_WITH_JAVA AND LITE_WITH_ARM) add_subdirectory(android) endif() +if (LITE_WITH_PYTHON) + add_subdirectory(python) +endif() + if (LITE_ON_TINY_PUBLISH) return() endif() diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 1060602e12f5821a1c2f110d01a87d5fc6902704..5e2c3e59a311b9a9a678e4fe686a419e8a045350 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -53,13 +53,9 @@ lite::Tensor *Predictor::GetInput(size_t offset) { } // get inputs names -const std::vector &Predictor::GetInputNames() { - return input_names_; -} +std::vector Predictor::GetInputNames() { return input_names_; } // get outputnames -const std::vector &Predictor::GetOutputNames() { - return output_names_; -} +std::vector Predictor::GetOutputNames() { return output_names_; } // append the names of inputs and outputs into input_names_ and output_names_ void Predictor::PrepareFeedFetch() { auto current_block = program_desc_.GetBlock(0); diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index 7226f4767ddf91c2e8d9864e4bc7a7665845179a..5a4a6919d7d2386819b05724e0f275e90a0fa119 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -74,8 +74,8 @@ class LITE_API Predictor { // get input by name. lite::Tensor* GetInputByName(const std::string& name); // get inputnames and get outputnames. - const std::vector& GetInputNames(); - const std::vector& GetOutputNames(); + std::vector GetInputNames(); + std::vector GetOutputNames(); void PrepareFeedFetch(); // Get offset-th col of fetch results. @@ -111,6 +111,40 @@ class LITE_API Predictor { std::vector output_names_; }; +class CxxPaddleApiImpl : public lite_api::PaddlePredictor { + public: + CxxPaddleApiImpl() {} + + /// Create a new predictor from a config. + void Init(const lite_api::CxxConfig& config); + + std::unique_ptr GetInput(int i) override; + + std::unique_ptr GetOutput(int i) const override; + + void Run() override; + + std::string GetVersion() const override; + + // get inputs names and get outputs names + std::vector GetInputNames() override; + std::vector GetOutputNames() override; + + std::unique_ptr GetTensor( + const std::string& name) const override; + + // Get InputTebsor by name + std::unique_ptr GetInputByName( + const std::string& name) override; + + void SaveOptimizedModel(const std::string& model_dir, + lite_api::LiteModelType model_type = + lite_api::LiteModelType::kProtobuf) override; + + private: + Predictor raw_predictor_; +}; + /* * An executor for training. * diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 62984ea476a901828367d74874291080667df3d8..3e216a40b50b8bcb3bdfdc2c0fd9aefc415764c0 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -21,42 +21,6 @@ namespace paddle { namespace lite { -class CxxPaddleApiImpl : public lite_api::PaddlePredictor { - public: - CxxPaddleApiImpl(); - - /// Create a new predictor from a config. - void Init(const lite_api::CxxConfig &config); - - std::unique_ptr GetInput(int i) override; - - std::unique_ptr GetOutput(int i) const override; - - void Run() override; - - std::string GetVersion() const override; - - // get inputs names and get outputs names - const std::vector &GetInputNames() override; - const std::vector &GetOutputNames() override; - - std::unique_ptr GetTensor( - const std::string &name) const override; - - // Get InputTebsor by name - std::unique_ptr GetInputByName( - const std::string &name) override; - - void SaveOptimizedModel(const std::string &model_dir, - lite_api::LiteModelType model_type = - lite_api::LiteModelType::kProtobuf) override; - - private: - Predictor raw_predictor_; -}; - -CxxPaddleApiImpl::CxxPaddleApiImpl() {} - void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { #ifdef LITE_WITH_CUDA Env::Init(); @@ -76,11 +40,11 @@ std::unique_ptr CxxPaddleApiImpl::GetOutput( return std::unique_ptr(new lite_api::Tensor(x)); } -const std::vector &CxxPaddleApiImpl::GetInputNames() { +std::vector CxxPaddleApiImpl::GetInputNames() { return raw_predictor_.GetInputNames(); } -const std::vector &CxxPaddleApiImpl::GetOutputNames() { +std::vector CxxPaddleApiImpl::GetOutputNames() { return raw_predictor_.GetOutputNames(); } diff --git a/lite/api/light_api.cc b/lite/api/light_api.cc index d28081c5152024606eb2e453aae1c7ca9eb7cd07..a0c4b7e5e375d9d004de63345ba5013ee6c252b9 100644 --- a/lite/api/light_api.cc +++ b/lite/api/light_api.cc @@ -81,11 +81,11 @@ const Tensor* LightPredictor::GetOutput(size_t offset) { return out_var->GetMutable(); } // get inputs names -const std::vector& LightPredictor::GetInputNames() { +std::vector LightPredictor::GetInputNames() { return input_names_; } // get outputnames -const std::vector& LightPredictor::GetOutputNames() { +std::vector LightPredictor::GetOutputNames() { return output_names_; } // append the names of inputs and outputs into input_names_ and output_names_ diff --git a/lite/api/light_api.h b/lite/api/light_api.h index 9d69cce441f86e563ad3ed0501514ab1fe79d98e..13ef72d92cfd83954188516eb297ad23b31994df 100644 --- a/lite/api/light_api.h +++ b/lite/api/light_api.h @@ -64,8 +64,8 @@ class LITE_API LightPredictor { } // get inputnames and get outputnames. - const std::vector& GetInputNames(); - const std::vector& GetOutputNames(); + std::vector GetInputNames(); + std::vector GetOutputNames(); void PrepareFeedFetch(); private: @@ -86,5 +86,31 @@ class LITE_API LightPredictor { std::vector output_names_; }; +class LightPredictorImpl : public lite_api::PaddlePredictor { + public: + LightPredictorImpl() = default; + + std::unique_ptr GetInput(int i) override; + + std::unique_ptr GetOutput(int i) const override; + + void Run() override; + + std::string GetVersion() const override; + std::vector GetInputNames() override; + std::vector GetOutputNames() override; + + std::unique_ptr GetTensor( + const std::string& name) const override; + // Get InputTebsor by name + std::unique_ptr GetInputByName( + const std::string& name) override; + + void Init(const lite_api::MobileConfig& config); + + private: + std::unique_ptr raw_predictor_; +}; + } // namespace lite } // namespace paddle diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc index 70ab8ac0c03b8dea84da5ef1d6ca9c64c4c9d102..90954187d2dbd211867232796dbe4ec556f9ba0c 100644 --- a/lite/api/light_api_impl.cc +++ b/lite/api/light_api_impl.cc @@ -19,77 +19,60 @@ #include "lite/model_parser/model_parser.h" namespace paddle { -namespace lite_api { - -class LightPredictorImpl : public PaddlePredictor { - public: - LightPredictorImpl() = default; - - std::unique_ptr GetInput(int i) override; - - std::unique_ptr GetOutput(int i) const override; - - void Run() override; - - std::string GetVersion() const override; - const std::vector& GetInputNames() override; - const std::vector& GetOutputNames() override; +namespace lite { - std::unique_ptr GetTensor( - const std::string& name) const override; - // Get InputTebsor by name - std::unique_ptr GetInputByName(const std::string& name) override; - - void Init(const MobileConfig& config); - - private: - std::unique_ptr raw_predictor_; -}; - -void LightPredictorImpl::Init(const MobileConfig& config) { +void LightPredictorImpl::Init(const lite_api::MobileConfig& config) { // LightPredictor Only support NaiveBuffer backend in publish lib - raw_predictor_.reset(new lite::LightPredictor(config.model_dir(), - config.model_buffer(), - config.param_buffer(), - config.model_from_memory(), - LiteModelType::kNaiveBuffer)); + raw_predictor_.reset( + new LightPredictor(config.model_dir(), + config.model_buffer(), + config.param_buffer(), + config.model_from_memory(), + lite_api::LiteModelType::kNaiveBuffer)); } -std::unique_ptr LightPredictorImpl::GetInput(int i) { - return std::unique_ptr(new Tensor(raw_predictor_->GetInput(i))); +std::unique_ptr LightPredictorImpl::GetInput(int i) { + return std::unique_ptr( + new lite_api::Tensor(raw_predictor_->GetInput(i))); } -std::unique_ptr LightPredictorImpl::GetOutput(int i) const { - return std::unique_ptr(new Tensor(raw_predictor_->GetOutput(i))); +std::unique_ptr LightPredictorImpl::GetOutput( + int i) const { + return std::unique_ptr( + new lite_api::Tensor(raw_predictor_->GetOutput(i))); } void LightPredictorImpl::Run() { raw_predictor_->Run(); } std::string LightPredictorImpl::GetVersion() const { return lite::version(); } -std::unique_ptr LightPredictorImpl::GetTensor( +std::unique_ptr LightPredictorImpl::GetTensor( const std::string& name) const { - return std::unique_ptr( - new Tensor(raw_predictor_->GetTensor(name))); + return std::unique_ptr( + new lite_api::Tensor(raw_predictor_->GetTensor(name))); } -std::unique_ptr LightPredictorImpl::GetInputByName( +std::unique_ptr LightPredictorImpl::GetInputByName( const std::string& name) { - return std::unique_ptr( - new Tensor(raw_predictor_->GetInputByName(name))); + return std::unique_ptr( + new lite_api::Tensor(raw_predictor_->GetInputByName(name))); } -const std::vector& LightPredictorImpl::GetInputNames() { +std::vector LightPredictorImpl::GetInputNames() { return raw_predictor_->GetInputNames(); } -const std::vector& LightPredictorImpl::GetOutputNames() { +std::vector LightPredictorImpl::GetOutputNames() { return raw_predictor_->GetOutputNames(); } +} // namespace lite + +namespace lite_api { + template <> std::shared_ptr CreatePaddlePredictor( const MobileConfig& config) { - auto x = std::make_shared(); + auto x = std::make_shared(); x->Init(config); return x; } diff --git a/lite/api/light_api_test.cc b/lite/api/light_api_test.cc index d2bbc295ad4b68e7849d5d25f34e0b5117fc846d..7d322530f624c43737018d8ece98fb24d48bc16a 100644 --- a/lite/api/light_api_test.cc +++ b/lite/api/light_api_test.cc @@ -37,13 +37,13 @@ TEST(LightAPI, load) { } predictor.PrepareFeedFetch(); - const std::vector& inputs = predictor.GetInputNames(); + const std::vector inputs = predictor.GetInputNames(); LOG(INFO) << "input size: " << inputs.size(); for (int i = 0; i < inputs.size(); i++) { LOG(INFO) << "inputnames: " << inputs[i]; } - const std::vector& outputs = predictor.GetOutputNames(); + const std::vector outputs = predictor.GetOutputNames(); for (int i = 0; i < outputs.size(); i++) { LOG(INFO) << "outputnames: " << outputs[i]; } diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc index 16ae5db7776aeea285906bfcb1d68ae30b68bf12..e87885c369999470bd0d1d8875cade797630388d 100644 --- a/lite/api/paddle_api.cc +++ b/lite/api/paddle_api.cc @@ -14,8 +14,13 @@ #include "lite/api/paddle_api.h" #include "lite/core/device_info.h" +#include "lite/core/target_wrapper.h" #include "lite/core/tensor.h" +#ifdef LITE_WITH_CUDA +#include "lite/backends/cuda/target_wrapper.h" +#endif + namespace paddle { namespace lite_api { @@ -42,6 +47,11 @@ const int8_t *Tensor::data() const { return ctensor(raw_tensor_)->data(); } +template <> +const int32_t *Tensor::data() const { + return ctensor(raw_tensor_)->data(); +} + template <> int *Tensor::mutable_data(TargetType type) const { return tensor(raw_tensor_)->mutable_data(type); @@ -55,10 +65,81 @@ int8_t *Tensor::mutable_data(TargetType type) const { return tensor(raw_tensor_)->mutable_data(type); } +template +void Tensor::CopyFromCpu(const T *src_data) { + T *data = tensor(raw_tensor_)->mutable_data(type); + int64_t num = tensor(raw_tensor_)->numel(); + CHECK(num > 0) << "You should call Resize interface first"; + if (type == TargetType::kHost || type == TargetType::kARM) { + lite::TargetWrapperHost::MemcpySync( + data, src_data, num * sizeof(T), lite::IoDirection::HtoH); + } else if (type == TargetType::kCUDA) { +#ifdef LITE_WITH_CUDA + lite::TargetWrapperCuda::MemcpySync( + data, src_data, num * sizeof(T), lite::IoDirection::HtoD); +#else + LOG(FATAL) << "Please compile the lib with CUDA."; +#endif + } else { + LOG(FATAL) << "The CopyFromCpu interface just support kHost, kARM, kCUDA"; + } +} +template +void Tensor::CopyToCpu(T *data) { + const T *src_data = tensor(raw_tensor_)->data(); + int64_t num = tensor(raw_tensor_)->numel(); + CHECK(num > 0) << "You should call Resize interface first"; + auto type = tensor(raw_tensor_)->target(); + if (type == TargetType::kHost || type == TargetType::kARM) { + lite::TargetWrapperHost::MemcpySync( + data, src_data, num * sizeof(T), lite::IoDirection::HtoH); + } else if (type == TargetType::kCUDA) { +#ifdef LITE_WITH_CUDA + lite::TargetWrapperCuda::MemcpySync( + data, src_data, num * sizeof(T), lite::IoDirection::DtoH); +#else + LOG(FATAL) << "Please compile the lib with CUDA."; +#endif + } else { + LOG(FATAL) << "The CopyToCpu interface just support kHost, kARM, kCUDA"; + } +} + +template void Tensor::CopyFromCpu(const int *); +template void Tensor::CopyFromCpu(const float *); +template void Tensor::CopyFromCpu(const int8_t *); + +template void Tensor::CopyFromCpu(const int *); +template void Tensor::CopyFromCpu(const float *); +template void Tensor::CopyFromCpu(const int8_t *); +template void Tensor::CopyFromCpu(const int *); +template void Tensor::CopyFromCpu(const float *); +template void Tensor::CopyFromCpu(const int8_t *); + +template void Tensor::CopyToCpu(int8_t *); +template void Tensor::CopyToCpu(float *); +template void Tensor::CopyToCpu(int *); + shape_t Tensor::shape() const { return ctensor(raw_tensor_)->dims().Vectorize(); } +TargetType Tensor::target() const { + auto type = ctensor(raw_tensor_)->target(); + if (type == TargetType::kUnk) { + CHECK(false) << "This tensor was not initialized."; + } + return type; +} + +PrecisionType Tensor::precision() const { + auto precision = ctensor(raw_tensor_)->precision(); + if (precision == PrecisionType::kUnk) { + CHECK(false) << "This tensor was not initialized."; + } + return precision; +} + lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); } void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); } diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index d7e3c014b0fe37a5f1da4210972349ac4124ed6b..3886c462ff3192ccb522b741debe6730e3e0e4fb 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -45,8 +45,15 @@ struct LITE_API Tensor { template T* mutable_data(TargetType type = TargetType::kHost) const; + template + void CopyFromCpu(const T* data); + + template + void CopyToCpu(T* data); /// Shape of the tensor. shape_t shape() const; + TargetType target() const; + PrecisionType precision() const; // LoD of the tensor lod_t lod() const; @@ -75,9 +82,9 @@ class LITE_API PaddlePredictor { virtual std::string GetVersion() const = 0; // Get input names - virtual const std::vector& GetInputNames() = 0; + virtual std::vector GetInputNames() = 0; // Get output names - virtual const std::vector& GetOutputNames() = 0; + virtual std::vector GetOutputNames() = 0; // Get Input by name virtual std::unique_ptr GetInputByName(const std::string& name) = 0; diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index 443a05d9927cfa461a306ce6c3c32ff6e5024631..f87c707ddbea0d4e78d195e4529892b321027e8f 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -37,12 +37,12 @@ TEST(CxxApi, run) { LOG(INFO) << "Version: " << predictor->GetVersion(); - auto& inputs = predictor->GetInputNames(); + auto inputs = predictor->GetInputNames(); LOG(INFO) << "input size: " << inputs.size(); for (int i = 0; i < inputs.size(); i++) { LOG(INFO) << "inputnames: " << inputs[i]; } - auto& outputs = predictor->GetOutputNames(); + auto outputs = predictor->GetOutputNames(); for (int i = 0; i < outputs.size(); i++) { LOG(INFO) << "outputnames: " << outputs[i]; } @@ -76,12 +76,12 @@ TEST(LightApi, run) { auto predictor = lite_api::CreatePaddlePredictor(config); - auto& inputs = predictor->GetInputNames(); + auto inputs = predictor->GetInputNames(); LOG(INFO) << "input size: " << inputs.size(); for (int i = 0; i < inputs.size(); i++) { LOG(INFO) << "inputnames: " << inputs.at(i); } - auto& outputs = predictor->GetOutputNames(); + auto outputs = predictor->GetOutputNames(); for (int i = 0; i < outputs.size(); i++) { LOG(INFO) << "outputnames: " << outputs.at(i); } diff --git a/lite/api/python/CMakeLists.txt b/lite/api/python/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..43178a37c663bb09acb7c025e021cbc91bf0cc5d --- /dev/null +++ b/lite/api/python/CMakeLists.txt @@ -0,0 +1,7 @@ +if (NOT LITE_WITH_PYTHON) + return() +endif() + + +add_subdirectory(pybind) +#add_subdirectory(interface) diff --git a/lite/api/python/pybind/CMakeLists.txt b/lite/api/python/pybind/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..178f167e6a1627d01df13b2e105e0af36b20601a --- /dev/null +++ b/lite/api/python/pybind/CMakeLists.txt @@ -0,0 +1,6 @@ +set(PYBIND_DEPS pybind python paddle_api_light paddle_api) +if (NOT LITE_ON_TINY_PUBLISH) + set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full) +endif() + +lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS}) diff --git a/lite/api/python/pybind/pybind.cc b/lite/api/python/pybind/pybind.cc new file mode 100644 index 0000000000000000000000000000000000000000..00f083f54d811d0431a1f6b7e632b8d2c49c40e4 --- /dev/null +++ b/lite/api/python/pybind/pybind.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/api/python/pybind/pybind.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef LITE_ON_TINY_PUBLISH +#include "lite/api/cxx_api.h" +#include "lite/api/paddle_use_passes.h" +#endif + +#include "lite/api/light_api.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/tensor.h" + +namespace py = pybind11; + +namespace paddle { +namespace lite { +namespace pybind { + +using lite_api::Tensor; +using lite_api::CxxConfig; +using lite_api::MobileConfig; +using lite_api::PowerMode; +using lite_api::TargetType; +using lite_api::PrecisionType; +using lite_api::DataLayoutType; +using lite_api::Place; +using lite::LightPredictorImpl; + +#ifndef LITE_ON_TINY_PUBLISH +using lite::CxxPaddleApiImpl; +static void BindLiteCxxPredictor(py::module *m); +#endif +static void BindLiteLightPredictor(py::module *m); +static void BindLiteCxxConfig(py::module *m); +static void BindLiteMobileConfig(py::module *m); +static void BindLitePowerMode(py::module *m); +static void BindLitePlace(py::module *m); +static void BindLiteTensor(py::module *m); + +void BindLiteApi(py::module *m) { + BindLiteCxxConfig(m); + BindLiteMobileConfig(m); + BindLitePowerMode(m); + BindLitePlace(m); + BindLiteTensor(m); +#ifndef LITE_ON_TINY_PUBLISH + BindLiteCxxPredictor(m); +#endif + BindLiteLightPredictor(m); + // Global helper methods + m->def("create_paddle_predictor", + [](const CxxConfig &config) -> std::unique_ptr { + auto x = std::unique_ptr(new CxxPaddleApiImpl()); + x->Init(config); + return std::move(x); + }); + m->def("create_paddle_predictor", + [](const MobileConfig &config) -> std::unique_ptr { + auto x = + std::unique_ptr(new LightPredictorImpl()); + x->Init(config); + return std::move(x); + }); +} + +void BindLiteCxxConfig(py::module *m) { + py::class_ cxx_config(*m, "CxxConfig"); + + cxx_config.def(py::init<>()) + .def("set_model_dir", &CxxConfig::set_model_dir) + .def("model_dir", &CxxConfig::model_dir) + .def("set_model_file", &CxxConfig::set_model_file) + .def("model_file", &CxxConfig::model_file) + .def("set_param_file", &CxxConfig::set_param_file) + .def("param_file", &CxxConfig::param_file) + .def("set_valid_places", &CxxConfig::set_valid_places) + .def("set_model_buffer", &CxxConfig::set_model_buffer) + .def("model_from_memory", &CxxConfig::model_from_memory); +#ifdef LITE_WITH_ARM + cxx_config.def("set_threads", &CxxConfig::set_threads) + .def("threads", &CxxConfig::threads) + .def("set_power_mode", &CxxConfig::set_power_mode) + .def("power_mode", &CxxConfig::power_mode); +#endif +} + +// TODO(sangoly): Should MobileConfig be renamed to LightConfig ?? +void BindLiteMobileConfig(py::module *m) { + py::class_ mobile_config(*m, "MobileConfig"); + + mobile_config.def(py::init<>()) + .def("set_model_dir", &MobileConfig::set_model_dir) + .def("model_dir", &MobileConfig::model_dir) + .def("set_model_buffer", &MobileConfig::set_model_buffer) + .def("model_from_memory", &MobileConfig::model_from_memory); +#ifdef LITE_WITH_ARM + mobile_config.def("set_threads", &MobileConfig::set_threads) + .def("threads", &MobileConfig::threads) + .def("set_power_mode", &MobileConfig::set_power_mode) + .def("power_mode", &MobileConfig::power_mode); +#endif +} + +void BindLitePowerMode(py::module *m) { + py::enum_(*m, "PowerMode") + .value("LITE_POWER_HIGH", PowerMode::LITE_POWER_HIGH) + .value("LITE_POWER_LOW", PowerMode::LITE_POWER_LOW) + .value("LITE_POWER_FULL", PowerMode::LITE_POWER_FULL) + .value("LITE_POWER_NO_BIND", PowerMode::LITE_POWER_NO_BIND) + .value("LITE_POWER_RAND_HIGH", PowerMode::LITE_POWER_RAND_HIGH) + .value("LITE_POWER_RAND_LOW", PowerMode::LITE_POWER_RAND_LOW); +} + +void BindLitePlace(py::module *m) { + // TargetType + py::enum_(*m, "TargetType") + .value("Host", TargetType::kHost) + .value("X86", TargetType::kX86) + .value("CUDA", TargetType::kCUDA) + .value("ARM", TargetType::kARM) + .value("OpenCL", TargetType::kOpenCL) + .value("FPGA", TargetType::kFPGA) + .value("NPU", TargetType::kNPU) + .value("Any", TargetType::kAny); + + // PrecisionType + py::enum_(*m, "PrecisionType") + .value("FP16", PrecisionType::kFP16) + .value("FP32", PrecisionType::kFloat) + .value("INT8", PrecisionType::kInt8) + .value("INT16", PrecisionType::kInt16) + .value("INT32", PrecisionType::kInt32) + .value("INT64", PrecisionType::kInt64) + .value("BOOL", PrecisionType::kBool) + .value("Any", PrecisionType::kAny); + + // DataLayoutType + py::enum_(*m, "DataLayoutType") + .value("NCHW", DataLayoutType::kNCHW) + .value("NHWC", DataLayoutType::kNHWC) + .value("Any", DataLayoutType::kAny); + + // Place + py::class_(*m, "Place") + .def(py::init(), + py::arg("target"), + py::arg("percision") = PrecisionType::kFloat, + py::arg("layout") = DataLayoutType::kNCHW, + py::arg("device") = 0) + .def("is_valid", &Place::is_valid); +} + +void BindLiteTensor(py::module *m) { + auto data_size_func = [](const std::vector &shape) -> int64_t { + int64_t res = 1; + for (size_t i = 0; i < shape.size(); i++) { + res *= shape[i]; + } + return res; + }; + + py::class_ tensor(*m, "Tensor"); + + tensor.def("resize", &Tensor::Resize) + .def("shape", &Tensor::shape) + .def("target", &Tensor::target) + .def("precision", &Tensor::precision) + .def("lod", &Tensor::lod) + .def("set_lod", &Tensor::SetLoD); + +#define DO_GETTER_ONCE(data_type__, name__) \ + tensor.def(#name__, [=](Tensor &self) -> std::vector { \ + std::vector data; \ + auto shape = self.shape(); \ + int64_t num = data_size_func(shape); \ + data.resize(num); \ + self.CopyToCpu(data.data()); \ + return data; \ + }); + +#define DO_SETTER_ONCE(data_type__, name__) \ + tensor.def( \ + #name__, \ + [](Tensor &self, \ + const std::vector &data, \ + TargetType type = TargetType::kHost) { \ + if (type == TargetType::kHost || type == TargetType::kARM) { \ + self.CopyFromCpu(data.data()); \ + } else if (type == TargetType::kCUDA) { \ + self.CopyFromCpu(data.data()); \ + } \ + }, \ + py::arg("data"), \ + py::arg("type") = TargetType::kHost); + +#define DATA_GETTER_SETTER_ONCE(data_type__, name__) \ + DO_SETTER_ONCE(data_type__, set_##name__##_data) \ + DO_GETTER_ONCE(data_type__, name__##_data) + + DATA_GETTER_SETTER_ONCE(int8_t, int8); + DATA_GETTER_SETTER_ONCE(int32_t, int32); + DATA_GETTER_SETTER_ONCE(float, float); +#undef DO_GETTER_ONCE +#undef DO_SETTER_ONCE +#undef DATA_GETTER_SETTER_ONCE +} + +#ifndef LITE_ON_TINY_PUBLISH +void BindLiteCxxPredictor(py::module *m) { + py::class_(*m, "CxxPredictor") + .def(py::init<>()) + .def("get_input", &CxxPaddleApiImpl::GetInput) + .def("get_output", &CxxPaddleApiImpl::GetOutput) + .def("run", &CxxPaddleApiImpl::Run) + .def("get_version", &CxxPaddleApiImpl::GetVersion) + .def("save_optimized_model", + [](CxxPaddleApiImpl &self, const std::string &output_dir) { + self.SaveOptimizedModel(output_dir, + lite_api::LiteModelType::kNaiveBuffer); + }); +} +#endif + +void BindLiteLightPredictor(py::module *m) { + py::class_(*m, "LightPredictor") + .def(py::init<>()) + .def("get_input", &LightPredictorImpl::GetInput) + .def("get_output", &LightPredictorImpl::GetOutput) + .def("run", &LightPredictorImpl::Run) + .def("get_version", &LightPredictorImpl::GetVersion) + .def("save_optimized_model", + [](LightPredictorImpl &self, const std::string &output_dir) { + self.SaveOptimizedModel(output_dir, + lite_api::LiteModelType::kNaiveBuffer); + }); +} + +} // namespace pybind +} // namespace lite +} // namespace paddle diff --git a/lite/api/python/pybind/pybind.h b/lite/api/python/pybind/pybind.h new file mode 100644 index 0000000000000000000000000000000000000000..ca05f24b32fd0b0418d9cf595fe6134b34fa725f --- /dev/null +++ b/lite/api/python/pybind/pybind.h @@ -0,0 +1,34 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace paddle { +namespace lite { +namespace pybind { + +void BindLiteApi(pybind11::module *m); + +PYBIND11_MODULE(lite_core, m) { + m.doc() = "C++ core of Paddle-Lite"; + + BindLiteApi(&m); +} + +} // namespace pybind +} // namespace lite +} // namespace paddle diff --git a/lite/backends/cuda/CMakeLists.txt b/lite/backends/cuda/CMakeLists.txt index c0418f6b6a209bbfbdddf7a335d8a9fa3dd6246e..a6c3fcc66a789f159cd3a756ed893627b393e1fe 100644 --- a/lite/backends/cuda/CMakeLists.txt +++ b/lite/backends/cuda/CMakeLists.txt @@ -1,8 +1,10 @@ if(NOT LITE_WITH_CUDA) return() endif() +set(cuda_static_deps cudnn_static cublas_static curand_static + culibos_static cudart_static) -nv_library(target_wrapper_cuda SRCS target_wrapper.cc) -nv_library(cuda_blas SRCS blas.cc) +nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_static_deps}) +nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_static_deps}) add_subdirectory(math) diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt index 67f55881ce4010d1179d9b6013aa560c56dd949e..a8b699f59b8e1f6d9a98e16cbab11f860a71447b 100644 --- a/lite/kernels/cuda/CMakeLists.txt +++ b/lite/kernels/cuda/CMakeLists.txt @@ -37,5 +37,4 @@ nv_test(pool_compute_cuda_test SRCS pool_compute_test.cc DEPS pool_compute_cuda) #nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda) nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda) nv_test(dropout_compute_cuda_test SRCS dropout_compute_test.cc DEPS dropout_compute_cuda ) -nv_test(pool_compute_cuda_test SRCS pool_compute_test.cc DEPS pool_compute_cuda ) nv_test(bilinear_interp_compute_cuda_test SRCS bilinear_interp_compute_test.cc DEPS bilinear_interp_compute_cuda)