Merge pull request #701 from NHZlX/add_high_api_for_paddle_mobile

add high api for paddle mobile

Merge pull request #701 from NHZlX/add_high_api_for_paddle_mobile
add high api for paddle mobile
e00d9a82 · Ruilong Liu · GitHub · a7efd311 · ae1d6ba9 · e00d9a82
6 changed file
--- a/src/io/api.cc
+++ b/src/io/api.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "io/paddle_inference_api.h"
+namespace paddle_mobile {
+int PaddleDtypeSize(PaddleDType dtype) {
+  switch (dtype) {
+    case PaddleDType::FLOAT32:
+      return sizeof(float);
+    case PaddleDType::INT64:
+      return sizeof(int64_t);
+    default:
+      assert(false);
+      return -1;
+  }
+}
+PaddleBuf::PaddleBuf(PaddleBuf&& other)
+    : data_(other.data_),
+      length_(other.length_),
+      memory_owned_(other.memory_owned_) {
+  other.memory_owned_ = false;
+  other.data_ = nullptr;
+  other.length_ = 0;
+}
+PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; }
+PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
+  // only the buffer with external memory can be copied
+  if (!other.memory_owned_) {
+    data_ = other.data_;
+    length_ = other.length_;
+    memory_owned_ = other.memory_owned_;
+  } else {
+    Resize(other.length());
+    memcpy(data_, other.data(), other.length());
+    length_ = other.length();
+    memory_owned_ = true;
+  }
+  return *this;
+}
+void PaddleBuf::Resize(size_t length) {
+  // Only the owned memory can be reset, the external memory can't be changed.
+  if (length_ == length) return;
+  if (memory_owned_) {
+    Free();
+  }
+  data_ = new char[length];
+  length_ = length;
+  memory_owned_ = true;
+}
+void PaddleBuf::Reset(void* data, size_t length) {
+  Free();
+  memory_owned_ = false;
+  data_ = data;
+  length_ = length;
+}
+void PaddleBuf::Free() {
+  if (memory_owned_ && data_) {
+    assert(length_ > 0);
+    delete[] static_cast<char*>(data_);
+    data_ = nullptr;
+    length_ = 0;
+  }
+}
+}  // namespace paddle_mobile
--- a/src/io/api_paddle_mobile.cc
+++ b/src/io/api_paddle_mobile.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "io/api_paddle_mobile.h"
+#include <vector>
+#include "framework/tensor.h"
+namespace paddle_mobile {
+template <typename Dtype, Precision P>
+PaddleMobilePredictor<Dtype, P>::PaddleMobilePredictor(
+    const PaddleMobileConfig &config) {
+  PADDLE_MOBILE_ENFORCE(Init(config) == true,
+                        "paddle mobile predictor init failed!");
+  config_ = config;
+}
+template <typename Dtype, Precision P>
+bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
+  paddle_mobile_.reset(new PaddleMobile<Dtype, P>());
+  if (!config.model_dir.empty()) {
+    paddle_mobile_->Load(config.model_dir, config.optimize,
+                         config.quantification, config.batch_size);
+  } else if (!config.prog_file.empty() && !config.param_file.empty()) {
+    paddle_mobile_->Load(config.prog_file, config.param_file, config.optimize,
+                         config.quantification, config.batch_size);
+  } else {
+    LOG(kLOG_ERROR) << "fail to load inference model!";
+    return false;
+  }
+  // If the openmp is open, set the thread num
+  paddle_mobile_->SetThreadNum(config.thread_num);
+  return true;
+}
+template <typename Dtype, Precision P>
+bool PaddleMobilePredictor<Dtype, P>::Run(
+    const std::vector<PaddleTensor> &inputs,
+    std::vector<PaddleTensor> *output_data, int batch_size) {
+  if (inputs.empty()) {
+    LOG(kLOG_ERROR) << "At least one output should be set with tensors' names.";
+    return false;
+  }
+  auto input = inputs[0];
+  if (input.shape.size() != 4) {
+    LOG(kLOG_ERROR) << "input shape not equal to 4!";
+    return false;
+  }
+  std::vector<int64_t> dims;
+  for (auto d : input.shape) {
+    dims.push_back(static_cast<int64_t>(d));
+  }
+  // use tensor
+  framework::DDim ddim =
+      framework::make_ddim({dims[0], dims[1], dims[2], dims[3]});
+  framework::Tensor input_tensor;
+  input_tensor.Resize(ddim);
+  int input_length = framework::product(ddim);
+  typedef typename PrecisionTrait<P>::ptype PType;
+  auto input_ptr = input_tensor.mutable_data<PType>();
+  memcpy(input_ptr, static_cast<PType *>(input.data.data()),
+         input_length * sizeof(PType));
+  auto output_tensor = paddle_mobile_->Predict(input_tensor);
+  if (output_data->empty()) {
+    LOG(kLOG_ERROR) << "At least one output should be set with tensors' names.";
+    return false;
+  }
+  auto &output = (*output_data)[0];
+  int output_length = output_tensor->numel();
+  std::vector<int64_t> tensor_shape =
+      framework::vectorize(output_tensor->dims());
+  for (auto d : tensor_shape) {
+    output.shape.push_back(static_cast<int>(d));
+  }
+  if (output.data.length() < output_length * sizeof(PType)) {
+    output.data.Resize(output_length * sizeof(PType));
+  }
+  memcpy(output.data.data(), output_tensor->template data<PType>(),
+         output_length * sizeof(PType));
+  return true;
+}
+// A factory to help create difference predictor.
+template <>
+std::unique_ptr<PaddlePredictor>
+CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
+    const PaddleMobileConfig &config) {
+  std::unique_ptr<PaddlePredictor> x;
+  if (config.precision == PaddleMobileConfig::FP32) {
+    if (config.device == PaddleMobileConfig::kCPU) {
+      x.reset(new PaddleMobilePredictor<CPU, Precision::FP32>(config));
+    } else if (config.device == PaddleMobileConfig::kFPGA) {
+      x.reset(new PaddleMobilePredictor<FPGA, Precision::FP32>(config));
+    } else if (config.device == PaddleMobileConfig::kGPU_MALI) {
+      x.reset(new PaddleMobilePredictor<GPU_MALI, Precision::FP32>(config));
+    } else {
+      LOG(kLOG_ERROR) << "unsupport device type!";
+      return nullptr;
+    }
+  } else {
+    LOG(kLOG_ERROR) << "unsupport precision type!";
+    return nullptr;
+  }
+  return std::move(x);
+}
+}  // namespace paddle_mobile
--- a/src/io/api_paddle_mobile.h
+++ b/src/io/api_paddle_mobile.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+/*
+ * This file contains the implementation of inference API with Anakin engine
+ * embeded, this API can only support Anakin models.
+ */
+#pragma once
+#include <vector>
+#include "io/paddle_inference_api.h"
+// from paddle_mobile
+#include "common/enforce.h"
+#include "common/types.h"
+#include "io/paddle_mobile.h"
+namespace paddle_mobile {
+template <typename Dtype = CPU, Precision P = Precision::FP32>
+class PaddleMobilePredictor : public PaddlePredictor {
+ public:
+  PaddleMobilePredictor() {}
+  explicit PaddleMobilePredictor(const PaddleMobileConfig& config);
+  bool Run(const std::vector<PaddleTensor>& inputs,
+           std::vector<PaddleTensor>* output_data,
+           int batch_size = -1) override;
+  ~PaddleMobilePredictor() override{};
+ private:
+  std::unique_ptr<PaddleMobile<Dtype, P>> paddle_mobile_;
+  bool Init(const PaddleMobileConfig& config);
+  PaddleMobileConfig config_;
+};
+}  // namespace paddle_mobile
--- a/src/io/paddle_inference_api.h
+++ b/src/io/paddle_inference_api.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+/*
+ * This file contains the definition of a simple Inference API for Paddle.
+ *
+ * ATTENTION: It requires some C++ features, for lower version C++ or C, we
+ * might release another API.
+ */
+#pragma once
+#include <cassert>
+#include <memory>
+#include <string>
+#include <vector>
+namespace paddle_mobile {
+enum PaddleDType {
+  FLOAT32,
+  INT64,
+};
+class PaddleBuf {
+ public:
+  PaddleBuf() = default;
+  PaddleBuf(PaddleBuf&& other);
+  // Copy only available when memory is managed externally.
+  explicit PaddleBuf(const PaddleBuf&);
+  PaddleBuf& operator=(const PaddleBuf&);
+  // Do not own the memory.
+  PaddleBuf(void* data, size_t length)
+      : data_(data), length_(length), memory_owned_{false} {}
+  // Own memory.
+  PaddleBuf(size_t length)
+      : data_(new char[length]), length_(length), memory_owned_(true) {}
+  // Resize to `length` bytes.
+  void Resize(size_t length);
+  // Reset to external memory.
+  void Reset(void* data, size_t length);
+  bool empty() const { return length_ == 0; }
+  void* data() const { return data_; }
+  size_t length() const { return length_; }
+  ~PaddleBuf() { Free(); }
+ private:
+  void Free();
+  void* data_{nullptr};  // pointer to the data memory.
+  size_t length_{0};     // number of memory bytes.
+  bool memory_owned_{true};
+};
+struct PaddleTensor {
+  PaddleTensor() = default;
+  std::string name;  // variable name.
+  std::vector<int> shape;
+  // TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
+  PaddleBuf data;  // blob of data.
+  PaddleDType dtype;
+};
+enum class PaddleEngineKind {
+  kPaddleMobile,
+  // TODO(Superjomn) support following engines latter.
+  // kTensorRT,           // Use TensorRT for inference.
+  // kAutoMixedAnakin,    // Automatically mix Fluid with Anakin.
+  // kAutoMixedTensorRT,  // Automatically mix Fluid with TensorRT.
+};
+/*
+ * A simple Inference API for Paddle. Currently this API can be used by
+ * non-sequence scenerios.
+ */
+class PaddlePredictor {
+ public:
+  struct Config;
+  PaddlePredictor() = default;
+  PaddlePredictor(const PaddlePredictor&) = delete;
+  PaddlePredictor& operator=(const PaddlePredictor&) = delete;
+  // Predict an record.
+  // The caller should be responsible for allocating and releasing the memory of
+  // `inputs`. `inputs` should be available until Run returns. Caller should be
+  // responsible for the output tensor's buffer, either allocated or passed from
+  // outside.
+  virtual bool Run(const std::vector<PaddleTensor>& inputs,
+                   std::vector<PaddleTensor>* output_data,
+                   int batch_size = -1) = 0;
+  // Destroy the Predictor.
+  virtual ~PaddlePredictor() = default;
+  // The common configs for all the predictors.
+  struct Config {
+    std::string model_dir;  // path to the model directory.
+  };
+};
+struct PaddleMobileConfig : public PaddlePredictor::Config {
+  enum Precision { FP32 = 0 };
+  enum Device { kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
+  enum Precision precision;
+  enum Device device;
+  int batch_size = 1;
+  bool optimize = true;
+  bool quantification = false;
+  int thread_num = 1;
+  std::string prog_file;
+  std::string param_file;
+};
+// A factory to help create different predictors.
+template <typename ConfigT,
+          PaddleEngineKind engine = PaddleEngineKind::kPaddleMobile>
+std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
+}  // namespace paddle_mobile
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -100,6 +100,10 @@ else ()
    ADD_EXECUTABLE(test-load framework/test_load.cpp)
    target_link_libraries(test-load paddle-mobile)
+    ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
+    target_link_libraries(test-inference-api paddle-mobile)
    # gen test log
    # gen test
    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)

--- a/test/framework/test_inference_api.cpp
+++ b/test/framework/test_inference_api.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <iostream>
+#include "io/paddle_inference_api.h"
+using namespace paddle_mobile;
+PaddleMobileConfig GetConfig() {
+  PaddleMobileConfig config;
+  config.precision = PaddleMobileConfig::FP32;
+  config.device = PaddleMobileConfig::kCPU;
+  config.model_dir = "../models/mobilenet/";
+  config.thread_num = 4;
+  return config;
+}
+int main() {
+  PaddleMobileConfig config = GetConfig();
+  auto predictor =
+      CreatePaddlePredictor<PaddleMobileConfig,
+                            PaddleEngineKind::kPaddleMobile>(config);
+  float data[1 * 3 * 224 * 224] = {1.0f};
+  PaddleTensor tensor;
+  tensor.shape = std::vector<int>({1, 3, 224, 224});
+  tensor.data = PaddleBuf(data, sizeof(data));
+  tensor.dtype = PaddleDType::FLOAT32;
+  std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
+  PaddleTensor tensor_out;
+  tensor_out.shape = std::vector<int>({});
+  tensor_out.data = PaddleBuf();
+  tensor_out.dtype = PaddleDType::FLOAT32;
+  std::vector<PaddleTensor> outputs(1, tensor_out);
+  assert(predictor->Run(paddle_tensor_feeds, &outputs));
+  float* data_o = static_cast<float*>(outputs[0].data.data());
+  for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); ++j) {
+    std::cout << "output[" << j << "]: " << data_o[j] << std::endl;
+  }
+  return 0;
+}