diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ad69738eb2ac21d6ff2624f11d17a38410d5c1f..880fc8f05ca088f6b8bc37d6a878f5207ac2e877 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,12 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") message(STATUS "AR tools: ${CMAKE_AR}") +option(WITH_ARM_CPU "Cross compile PaddlePaddle to support ARM CPU" OFF) +if (WITH_ARM_CPU) + add_subdirectory(paddle/fluid/inference/lite) + return() +endif() + if(WIN32) set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_STATIC_LIBRARY_PREFIX lib) diff --git a/cmake/system.cmake b/cmake/system.cmake index 65db05bebe957d740e391847d980e211b0e9e750..5d837d9006e03c5bd44c33901a9450a3cd7e1d44 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -20,6 +20,10 @@ # for instance, protobuf libs path is /lib64 # on CentOS, but /lib on other systems. +if (WITH_ARM_CPU) + return() +endif() + IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..da3f8cc04ba45c6f187a7543e5a9265821ed7660 --- /dev/null +++ b/paddle/fluid/inference/lite/CMakeLists.txt @@ -0,0 +1,43 @@ +add_definitions(-DUSE_ARM_PLACE) +set(CMAKE_CXX_FLAGS "-std=c++11 -pie -fPIE -Wno-attributes ${CMAKE_CXX_FLAGS}") +if (NOT (${CMAKE_CXX_COMPILER} MATCHES "clang\\+\\+$")) + set(CMAKE_CXX_FLAGS "-fopenmp ${CMAKE_CXX_FLAGS}") +endif() +if (ANDROID) + set(CMAKE_CXX_FLAGS "-llog ${CMAKE_CXX_FLAGS}") +endif() +if (IOS) + set(CMAKE_CXX_FLAGS "-fembed-bitcode ${CMAKE_CXX_FLAGS}") +endif() + +set(PADDLE_LITE_LIB paddle-lite) +set(PADDLE_LITE_SRCS api.cc api_anakin_engine.cc) + +set(PADDLE_LITE_PATH ${PADDLE_SOURCE_DIR}/paddle/fluid/inference/lite) +include_directories(${CMAKE_SOURCE_DIR}) +include_directories(${PADDLE_LITE_PATH} ${PADDLE_LITE_PATH}/output + ${PADDLE_LITE_PATH}/output/saber) + +if (BUILD_SHARED_LIBS) + add_library(${PADDLE_LITE_LIB} SHARED ${PADDLE_LITE_SRCS}) +else() + add_library(${PADDLE_LITE_LIB} STATIC ${PADDLE_LITE_SRCS}) +endif(BUILD_SHARED_LIBS) +#target_link_libraries(${PADDLE_LITE_LIB} ) + +#add_library(anakin SHARED IMPORTED) +#set_target_properties(anakin PROPERTIES IMPORTED_LOCATION +# ${PADDLE_LITE_PATH}/output/libanakin.so) +add_library(anakin STATIC IMPORTED) +set_target_properties(anakin PROPERTIES IMPORTED_LOCATION + ${PADDLE_LITE_PATH}/output/libanakin_static.a) +add_library(saber_common STATIC IMPORTED) +set_target_properties(saber_common PROPERTIES IMPORTED_LOCATION + ${PADDLE_LITE_PATH}/output/libanakin_saber_common.a) +add_library(protobuf STATIC IMPORTED) +set_target_properties(protobuf PROPERTIES IMPORTED_LOCATION + ${PADDLE_LITE_PATH}/output/protobuf/lib/libprotobuf.a) + +add_executable(test-benchmark benchmark/benchmark.cc) +target_link_libraries(test-benchmark paddle-lite "-Wl,--whole-archive" + saber_common anakin "-Wl,--no-whole-archive" protobuf) diff --git a/paddle/fluid/inference/lite/anakin_config.h b/paddle/fluid/inference/lite/anakin_config.h new file mode 100644 index 0000000000000000000000000000000000000000..31ca96538b92032bcdc27eef08913771bc5e87ea --- /dev/null +++ b/paddle/fluid/inference/lite/anakin_config.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "paddle_api.h" // NOLINT +#include "utils/logger/logger.h" + +namespace paddle { +namespace contrib { +// Configurations for Anakin engine. +struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { ARM = 0, GPU }; + enum PrecisionType { FP32 = 0, FP16, INT8 }; + + std::string model_file; + int max_batch_size = 1; + int thread_num = 1; + TargetType target_type = ARM; + PrecisionType precision_type = FP32; +}; + +} // namespace contrib +} // namespace paddle diff --git a/paddle/fluid/inference/lite/api.cc b/paddle/fluid/inference/lite/api.cc new file mode 100644 index 0000000000000000000000000000000000000000..2843b09b7c7a645f85add424e7b14cfce49a8c79 --- /dev/null +++ b/paddle/fluid/inference/lite/api.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/inference/lite/paddle_api.h" + +namespace paddle { + +int PaddleDtypeSize(PaddleDType dtype) { + switch (dtype) { + case PaddleDType::FLOAT32: + return sizeof(float); + case PaddleDType::INT64: + return sizeof(int64_t); + case PaddleDType::INT32: + return sizeof(int32_t); + default: + assert(false); + return -1; + } +} + +PaddleBuf::PaddleBuf(PaddleBuf &&other) + : data_(other.data_), + length_(other.length_), + memory_owned_(other.memory_owned_) { + other.memory_owned_ = false; + other.data_ = nullptr; + other.length_ = 0; +} + +PaddleBuf::PaddleBuf(const PaddleBuf &other) { *this = other; } + +PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { + if (!other.memory_owned_) { + data_ = other.data_; + length_ = other.length_; + memory_owned_ = other.memory_owned_; + } else { + Resize(other.length()); + memcpy(data_, other.data(), other.length()); + length_ = other.length(); + memory_owned_ = true; + } + return *this; +} + +PaddleBuf &PaddleBuf::operator=(PaddleBuf &&other) { + // only the buffer with external memory can be copied + data_ = other.data_; + length_ = other.length_; + memory_owned_ = other.memory_owned_; + other.data_ = nullptr; + other.length_ = 0; + other.memory_owned_ = false; + return *this; +} + +void PaddleBuf::Resize(size_t length) { + // Only the owned memory can be reset, the external memory can't be changed. + if (length_ >= length) return; + if (memory_owned_) { + Free(); + data_ = malloc(length); + length_ = length; + memory_owned_ = true; + } else { + // PADDLE_THROW("The memory is allocated externally, can not Resized"); + } +} + +void PaddleBuf::Reset(void *data, size_t length) { + Free(); + memory_owned_ = false; + data_ = data; + length_ = length; +} + +void PaddleBuf::Free() { + if (memory_owned_ && data_) { + // PADDLE_ENFORCE_GT(length_, 0UL); + free(static_cast(data_)); + data_ = nullptr; + length_ = 0; + } +} + +} // namespace paddle diff --git a/paddle/fluid/inference/lite/api_anakin_engine.cc b/paddle/fluid/inference/lite/api_anakin_engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ac8f5081727b3a8c66ecd6a49aa2c00527e2c3e --- /dev/null +++ b/paddle/fluid/inference/lite/api_anakin_engine.cc @@ -0,0 +1,245 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/lite/api_anakin_engine.h" + +#include +#include +#include +#include +#include +#include + +#include "framework/core/net/net.h" +#include "framework/operators/ops.h" +#include "saber/funcs/timer.h" + +namespace paddle { + +using paddle::contrib::AnakinConfig; + +template +PaddleInferenceAnakinPredictor:: + PaddleInferenceAnakinPredictor(const contrib::AnakinConfig &config) { + anakin::saber::Env::env_init(); +#ifdef USE_ARM_PLACE + anakin::saber::Context ctx; + // set mode and thread number + anakin::saber::PowerMode mode = anakin::saber::SABER_POWER_HIGH; + ctx.set_run_mode(mode, config.thread_num); +// ctx.set_arch(anakin::A73); +// ctx.set_cache(32 * 1024, 512 * 1024, 0); +#endif + CHECK(Init(config)); +} + +template +bool PaddleInferenceAnakinPredictor::Init( + const contrib::AnakinConfig &config) { + if (!(graph_.load(config.model_file))) { + LOG(INFO) << "fail to load graph from " << config.model_file; + return false; + } + + auto inputs = graph_.get_ins(); + for (auto &input_str : inputs) { + graph_.ResetBatchSize(input_str, config.max_batch_size); + max_batch_size_ = config.max_batch_size; + } + // optimization for graph + if (!(graph_.Optimize())) { + return false; + } + // construct executer + if (executor_p_ == nullptr) { + executor_p_ = new anakin::Net(graph_, true); + } + return true; +} + +template +bool PaddleInferenceAnakinPredictor::Run( + const std::vector &inputs, + std::vector *output_data, int batch_size) { + for (const auto &input : inputs) { + if (input.dtype != PaddleDType::FLOAT32) { + LOG(INFO) << "Only support float type inputs. " << input.name + << "'s type is not float"; + return false; + } + auto d_tensor_in_p = executor_p_->get_in(input.name); + auto net_shape = d_tensor_in_p->shape(); + if (net_shape.size() != input.shape.size()) { + LOG(INFO) << " input " << input.name + << "'s shape size should be equal to that of net"; + return false; + } + int sum = 1; + for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); + if (sum > net_shape.count()) { + graph_.Reshape(input.name, input.shape); + delete executor_p_; + executor_p_ = new anakin::Net(graph_, true); + d_tensor_in_p = executor_p_->get_in(input.name); + } + + anakin::saber::Shape tmp_shape; + for (auto s : input.shape) { + tmp_shape.push_back(s); + } + d_tensor_in_p->reshape(tmp_shape); + + if (input.lod.size() > 0) { + if (input.lod.size() > 1) { + LOG(INFO) << " input lod first dim should <=1, but you set " + << input.lod.size(); + return false; + } + std::vector offset(input.lod[0].begin(), input.lod[0].end()); + d_tensor_in_p->set_seq_offset({offset}); + LOG(INFO) << "offset.size(): " << offset.size(); + for (int i = 0; i < offset.size(); i++) { + LOG(INFO) << offset[i]; + } + } + + void *d_data_p = d_tensor_in_p->mutable_data(); + if (std::is_same::value) { + memcpy(d_data_p, static_cast(input.data.data()), + d_tensor_in_p->valid_size() * sizeof(float)); + } + } + + if (output_data->empty()) { + LOG(INFO) << "At least one output should be set with tensors' names."; + return false; + } + // run prediction + executor_p_->prediction(); + + for (auto &output : *output_data) { + auto *tensor = executor_p_->get_out(output.name); + output.shape = tensor->valid_shape(); + if (output.data.length() < tensor->valid_size() * sizeof(float)) { + output.data.Resize(tensor->valid_size() * sizeof(float)); + } + + if (std::is_same::value) { + memcpy(output.data.data(), tensor->mutable_data(), + tensor->valid_size() * sizeof(float)); + } + } + return true; +} + +template +anakin::Net + &PaddleInferenceAnakinPredictor::get_executer() { + return *executor_p_; +} + +// the cloned new Predictor of anakin share the same net weights from original +// Predictor +template +std::unique_ptr +PaddleInferenceAnakinPredictor::Clone() { + LOG(INFO) << "Anakin Predictor::clone"; + std::unique_ptr cls( + new PaddleInferenceAnakinPredictor()); + // construct executer from other graph + auto anakin_predictor_p = + dynamic_cast *>( + cls.get()); + if (!anakin_predictor_p) { + LOG(INFO) << "fail to call Init"; + return nullptr; + } + anakin_predictor_p->get_executer().init(graph_); + + return std::move(cls); +} + +template class PaddleInferenceAnakinPredictor; +template class PaddleInferenceAnakinPredictor; + +// A factory to help create difference predictor. +template <> +std::unique_ptr +CreatePaddlePredictor( + const contrib::AnakinConfig &config) { + if (config.target_type != contrib::AnakinConfig::ARM) { + LOG(INFO) << "Anakin Predictor: Only ARM platform is supported currently."; + return nullptr; + } + + LOG(INFO) << "Anakin Predictor create."; + if (config.precision_type == contrib::AnakinConfig::FP32) { + LOG(INFO) << "Anakin Predictor create on [ FP32 ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else if (config.precision_type == contrib::AnakinConfig::INT8) { + LOG(INFO) << "Anakin Predictor create on [ INT8 ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else { + LOG(INFO) << "Anakin Predictor create on unsupported precision."; + return nullptr; + } +} + +#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER +template +using executor_t = anakin::Net; + +template +void DisplayOpTimer(executor_t *net_executor, int epoch) { + std::vector op_time = net_executor->get_op_time(); + auto exec_funcs = net_executor->get_exec_funcs(); + auto op_param = net_executor->get_op_param(); + for (int i = 0; i < op_time.size(); i++) { + LOG(INFO) << "name: " << exec_funcs[i].name + << " op_type: " << exec_funcs[i].op_name + << " op_param: " << op_param[i] << " time " << op_time[i] / epoch; + } + std::map op_map; + for (int i = 0; i < op_time.size(); i++) { + auto it = op_map.find(op_param[i]); + if (it != op_map.end()) + op_map[op_param[i]] += op_time[i]; + else + op_map.insert(std::pair(op_param[i], op_time[i])); + } + for (auto it = op_map.begin(); it != op_map.end(); ++it) { + LOG(INFO) << it->first << " " << (it->second) / epoch << " ms"; + } +} +#endif + +template +PaddleInferenceAnakinPredictor::~PaddleInferenceAnakinPredictor() { +#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER + DisplayOpTimer(executor_p_, max_batch_size_); +#endif + delete executor_p_; + executor_p_ = nullptr; +} + +} // namespace paddle diff --git a/paddle/fluid/inference/lite/api_anakin_engine.h b/paddle/fluid/inference/lite/api_anakin_engine.h new file mode 100644 index 0000000000000000000000000000000000000000..24fbd997c0fd9658e36831f4299e46a4fd9b399e --- /dev/null +++ b/paddle/fluid/inference/lite/api_anakin_engine.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file contains the implementation of inference API with Anakin engine + * embeded, this API can only support Anakin models. + */ + +#pragma once + +#include +#include +#include +#include "framework/core/net/net.h" +#include "framework/graph/graph.h" +#include "paddle/fluid/inference/lite/anakin_config.h" +#include "saber/core/shape.h" +#include "saber/saber_types.h" + +namespace paddle { + +using contrib::AnakinConfig; + +template +class PaddleInferenceAnakinPredictor : public PaddlePredictor { + public: + PaddleInferenceAnakinPredictor() {} + + explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config); + + // NOTE Unlike the native engine, the buffers of anakin engine's output_data + // should be allocated first. + bool Run(const std::vector& inputs, + std::vector* output_data, + int batch_size = -1) override; + + std::vector GetInputNames() override { return graph_.get_ins(); } + + std::vector GetOutputNames() override { + return graph_.get_outs(); + } + + std::unique_ptr Clone() override; + + anakin::Net& get_executer(); + + ~PaddleInferenceAnakinPredictor() override; + + private: + bool Init(const AnakinConfig& config); + + anakin::graph::Graph graph_; + anakin::Net* executor_p_{nullptr}; + AnakinConfig config_; + int max_batch_size_{0}; +}; + +} // namespace paddle diff --git a/paddle/fluid/inference/lite/benchmark/benchmark.cc b/paddle/fluid/inference/lite/benchmark/benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..b462881b652894754883131c18ba11dd79afab94 --- /dev/null +++ b/paddle/fluid/inference/lite/benchmark/benchmark.cc @@ -0,0 +1,149 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/fluid/inference/lite/anakin_config.h" +#include "paddle/fluid/inference/lite/paddle_api.h" + +namespace paddle { + +void PrintShape(const std::vector &shape) { + std::ostringstream os; + os << "Shape: "; + if (shape.size() > 0) { + os << shape[0]; + for (int i = 1; i < shape.size(); ++i) { + os << ", " << shape[i]; + } + } + LOG(INFO) << os.str(); +} + +int ShapeSize(const std::vector &shape) { + int size = 1; + for (int j = 0; j < shape.size(); ++j) { + size *= shape[j]; + } + return size; +} + +template +int InitTensorValFromFile(const std::string &file, PaddleTensor *tensor) { + int size = ShapeSize(tensor->shape); + void *tensor_data = tensor->data.data(); + std::ifstream in(file, std::ios::in | std::ios::binary); + in.read(reinterpret_cast(tensor_data), size * sizeof(T)); + in.close(); +} + +int SetupTensors(const std::vector> &shapes, + const std::vector &names, + std::vector *outputs) { + while (outputs->size() < shapes.size()) { + outputs->emplace_back(); + } + for (int i = 0; i < shapes.size(); ++i) { + int size = ShapeSize(shapes[i]); + outputs->at(i).name = names[i]; + outputs->at(i).shape = shapes[i]; + outputs->at(i).data.Resize(size * sizeof(float)); + outputs->at(i).dtype = FLOAT32; + } +} + +int test(const char *model, const char *image, const char *image_shape, + const int quant, const int times) { + contrib::AnakinConfig config; + config.model_file = std::string(model); + // config.model_file = "./mobilenetv1.anakin.bin"; + config.max_batch_size = 1; + config.precision_type = + (quant == 1) ? contrib::AnakinConfig::INT8 : contrib::AnakinConfig::FP32; + + LOG(INFO) << "quant: " << quant; + + std::unique_ptr predictor = + CreatePaddlePredictor( + config); + + LOG(INFO) << "create predictor success"; + std::vector in_names = predictor->GetInputNames(); + std::vector inputs, outpus; + std::vector> in_shapes; + std::vector dim{1, 3, 224, 224}; + sscanf(image_shape, "%d,%d,%d,%d", &dim[0], &dim[1], &dim[2], &dim[3]); + in_shapes.push_back(dim); + SetupTensors(in_shapes, in_names, &inputs); + PrintShape(dim); + + // InitTensorValFromFile("./test_image_1x3x224x224_float", &inputs[0]); + InitTensorValFromFile(std::string(image), &inputs[0]); + LOG(INFO) << "init tensor value success"; + + std::vector out_names = predictor->GetOutputNames(); + LOG(INFO) << "output size: " << out_names.size(); + outpus.resize(out_names.size()); + for (int i = 0; i < out_names.size(); ++i) { + outpus[i].name = out_names[i]; + } + + LOG(INFO) << "start run prediction"; + predictor->Run(inputs, &outpus); + + struct timespec ts_begin, ts_end; + clock_gettime(CLOCK_MONOTONIC, &ts_begin); + for (int i = 0; i < times; ++i) { + predictor->Run(inputs, &outpus); + } + clock_gettime(CLOCK_MONOTONIC, &ts_end); + uint64_t elapsed = (ts_end.tv_sec - ts_begin.tv_sec) * 1e3 + + (ts_end.tv_nsec - ts_begin.tv_nsec) / 1e6; + LOG(INFO) << "elapsed: " << (1.f * elapsed) / times << " ms"; + + LOG(INFO) << "finish prediction"; + + for (int i = 0; i < outpus.size(); ++i) { + int size = ShapeSize(outpus[i].shape); + // int stride = (size + 19) / 20; + int stride = 1; + int loop = size / stride; + float *output_data = static_cast(outpus[i].data.data()); + std::ostringstream os; + os << output_data[0]; + for (int j = 1; j < loop; ++j) { + os << ", " << output_data[j * stride]; + } + LOG(INFO) << os.str(); + } + return 0; +} + +} // namespace paddle + +int main(int argc, char *argv[]) { + if (argc < 6) { + LOG(INFO) << "Usage: ./benchmark [model] [image] [image-shape] [8bit] " + "[run-times]"; + LOG(INFO) << "Example:"; + LOG(INFO) << " ./benchmark ./mobilenetv1.model ./test_image.bin " + "1,3,224,224 0 10"; + return 1; + } + int quant_8bit = atoi(argv[4]); + int times = atoi(argv[5]); + return paddle::test(argv[1], argv[2], argv[3], quant_8bit, times); +} diff --git a/paddle/fluid/inference/lite/paddle_api.h b/paddle/fluid/inference/lite/paddle_api.h new file mode 100644 index 0000000000000000000000000000000000000000..87f40f09eb9bb552bd246cb39bbbd41abac1c9ac --- /dev/null +++ b/paddle/fluid/inference/lite/paddle_api.h @@ -0,0 +1,357 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +/*! \file paddle_api.h + */ + +/*! \mainpage Paddle Inference APIs + * \section intro_sec Introduction + * The Paddle inference library aims to offer an high performance inference SDK + * for Paddle users. + */ + +#include +#include +#include +#include + +/*! \namespace paddle + */ +namespace paddle { + +/** paddle data type. + */ +enum PaddleDType { + FLOAT32, + INT64, + INT32, + // TODO(Superjomn) support more data types if needed. +}; + +/** + * \brief Memory manager for `PaddleTensor`. + * + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. + * + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external + *memory. + * ATTENTION, for user allocated memory, deallocation should be done by users + *externally after the program finished. The PaddleBuf won't do any allocation + *or deallocation. + * + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * if the allocated memory is larger than `length`, nothing will done. + * + * Usage: + * + * Let PaddleBuf manage the memory internally. + * \code{cpp} + * const int num_elements = 128; + * PaddleBuf buf(num_elements * sizeof(float)); + * \endcode + * + * Or + * \code{cpp} + * PaddleBuf buf; + * buf.Resize(num_elements * sizeof(float)); + * \endcode + * Works the exactly the same. + * + * One can also make the `PaddleBuf` use the external memory. + * \code{cpp} + * PaddleBuf buf; + * void* external_memory = new float[num_elements]; + * buf.Reset(external_memory, num_elements*sizeof(float)); + * ... + * delete[] external_memory; // manage the memory lifetime outside. + * \endcode + */ +class PaddleBuf { + public: + /** PaddleBuf allocate memory internally, and manage it. + */ + explicit PaddleBuf(size_t length) + : data_(new char[length]), length_(length), memory_owned_(true) {} + /** Set external memory, the PaddleBuf won't manage it. + */ + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + /** Copy only available when memory is managed externally. + */ + explicit PaddleBuf(const PaddleBuf&); + + /** Resize the memory. + */ + void Resize(size_t length); + /** Reset to external memory, with address and length set. + */ + void Reset(void* data, size_t length); + /** Tell whether the buffer is empty. + */ + bool empty() const { return length_ == 0; } + /** Get the data's memory address. + */ + void* data() const { return data_; } + /** Get the memory length. + */ + size_t length() const { return length_; } + + ~PaddleBuf() { Free(); } + PaddleBuf& operator=(const PaddleBuf&); + PaddleBuf& operator=(PaddleBuf&&); + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); + + private: + void Free(); + void* data_{nullptr}; // pointer to the data memory. + size_t length_{0}; // number of memory bytes. + bool memory_owned_{true}; +}; + +/** Basic input and output data structure for PaddlePredictor. + */ +struct PaddleTensor { + PaddleTensor() = default; + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; + std::vector> lod; // Tensor+LoD equals LoDTensor +}; + +enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; + +/** Tensor without copy, currently only supports `AnalysisPredictor`. + */ +class ZeroCopyTensor { + public: + void Reshape(const std::vector& shape); + + /** Get the memory in CPU or GPU with specific data type, should Reshape first + * to tell the data size. + * Once can directly call this data to feed the data. + * This is for write the input tensor. + */ + template + T* mutable_data(PaddlePlace place); + /** Get the memory directly, will return the place and element size by + * pointer. + * This is for reading the output tensor. + */ + template + T* data(PaddlePlace* place, int* size) const; + + template + void copy_from_cpu(const T* data); + + template + void copy_to_cpu(T* data); + + std::vector shape() const; + + void SetLoD(const std::vector>& x); + std::vector> lod() const; + const std::string& name() const { return name_; } + void SetPlace(PaddlePlace place, int device = -1) { + place_ = place; + device_ = device; + } + + PaddleDType type() const; + + protected: + explicit ZeroCopyTensor(void* scope) : scope_{scope} {} + void SetName(const std::string& name) { name_ = name; } + void* FindTensor() const; + + private: + std::string name_; + bool input_or_output_; + friend class AnalysisPredictor; + void* scope_{nullptr}; + // The corresponding tensor pointer inside Paddle workspace is cached for + // performance. + mutable void* tensor_{nullptr}; + PaddlePlace place_; + PaddleDType dtype_; + int device_; +}; + +/** A simple Inference API for Paddle. + */ +class PaddlePredictor { + public: + struct Config; + PaddlePredictor() = default; + PaddlePredictor(const PaddlePredictor&) = delete; + PaddlePredictor& operator=(const PaddlePredictor&) = delete; + + /** Predict an record. + * The caller should be responsible for allocating and releasing the memory of + * `inputs`. `inputs` should be available until Run returns. Caller should be + * responsible for the output tensor's buffer, either allocated or passed from + * outside. + */ + virtual bool Run(const std::vector& inputs, + std::vector* output_data, + int batch_size = -1) = 0; + + /** \brief Get input names of the model + */ + virtual std::vector GetInputNames() { return {}; } + + /** \brief Get output names of the model + */ + virtual std::vector GetOutputNames() { return {}; } + + /** \brief Get a mutable tensor directly. + * + * NOTE Only works in AnalysisPredictor. + * + * One can also use this to modify any temporary variable related tensors in + * the predictor. + * + */ + virtual std::unique_ptr GetInputTensor( + const std::string& name) { + return nullptr; + } + /** + * \brief Get an immutable tensor without copy. + * + * NOTE Only works in AnalysisPredictor. + * One can use this API to get any temporary tensors in the predictor and + * read it. + */ + virtual std::unique_ptr GetOutputTensor( + const std::string& name) { + return nullptr; + } + /** + * \brief Run the predictor with zero-copied inputs and outputs. + * + * NOTE Only works in AnalysisPredictor. + * + * This will save the IO copy for transfering inputs and outputs to predictor + * workspace and get some performance improvement. + * To use it, one should call the `AnalysisConfig.SwitchUseFeedFetchOp(true)` + * and then use the `GetInputTensor` and `GetOutputTensor` to directly write + * or read the input/output tensors. + */ + virtual bool ZeroCopyRun() { return false; } + + /** Clone a predictor that share the model weights, the Cloned predictor + * should be thread-safe. + */ + virtual std::unique_ptr Clone() = 0; + + /** Destroy the Predictor. + */ + virtual ~PaddlePredictor() = default; + + /** \brief Get the serialized model program that executes in inference phase. + * Its data type is ProgramDesc, which is a protobuf message. + */ + virtual std::string GetSerializedProgram() const { + assert(false); // Force raise error. + return "NotImplemented"; + } + + /** The common configs for all the predictors. + */ + struct Config { + std::string model_dir; /*!< path to the model directory. */ + }; +}; + +struct NativeConfig : public PaddlePredictor::Config { + // GPU related fields. + bool use_gpu{false}; + int device{0}; + float fraction_of_gpu_memory{ + -1.f}; /*!< Change to a float in (0,1] if needed. */ + + // Specify the exact path of program and parameter files. + std::string prog_file; + std::string param_file; + + /** Specify the variable's name of each input if input tensors don't follow + * the + * `feeds` and `fetches` of the phase `save_inference_model`. + */ + bool specify_input_name{false}; + + /** Set and get the number of cpu math library threads. + */ + void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads) { + cpu_math_library_num_threads_ = cpu_math_library_num_threads; + } + int cpu_math_library_num_threads() const { + return cpu_math_library_num_threads_; + } + + protected: + // number of cpu math library (such as MKL, OpenBlas) threads for each + // instance. + int cpu_math_library_num_threads_{1}; +}; + +/*! \fn std::unique_ptr CreatePaddlePredictor(const ConfigT& + * config); + * + * \brief A factory to help create different predictors. + * + * Usage: + * + * \code{.cpp} + * NativeConfig config; + * ... // change the configs. + * auto native_predictor = CreatePaddlePredictor(config); + * \endcode + * + * FOR EXTENSION DEVELOPER: + * Different predictors are designated by config type. Similar configs can be + * merged, but there shouldn't be a huge config containing different fields for + * more than one kind of predictors. + */ +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +/** NOTE The following APIs are too trivial, we will discard it in the following + * versions. + */ +enum class PaddleEngineKind { + kNative = 0, /*!< Use the native Fluid facility. */ + kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */ + kAnalysis, /*!< More optimization. */ + kAnakin /*!< Use Anakin for inference, not mature yet. */ +}; + +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +int PaddleDtypeSize(PaddleDType dtype); + +std::string get_version(); + +} // namespace paddle