提交 9729edac 编写于 作者: H hjchen2

Support feed multi inputs and fetch multi outputs

上级 f20c9041
此差异已折叠。
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "common/util.h" #include "common/util.h"
...@@ -28,41 +29,29 @@ limitations under the License. */ ...@@ -28,41 +29,29 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32> template <typename Device, typename T = float>
class Executor { class Executor {
public: public:
typedef typename PrecisionTrait<P>::ptype Ptype; Executor(const Program<Device> &program, int batch_size = 1,
// exector constructor const bool use_optimize = true, const bool lod_mode = false);
// @param program program converted from proto program in PaddlePaddle
// @param use_optimize bool whether use operator fusion to speed up or not PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs);
// @param loddable bool PMStatus Predict(
Executor(const framework::Program<Dtype> program, int batch_size = 1, const std::vector<std::pair<std::string, LoDTensor>> &inputs);
const bool use_optimize = true, const bool loddable = false);
std::vector<T> Predict(const std::vector<T> &input,
// predict with tensor input const std::vector<int64_t> &dims);
// @param t input tensor to do prediction PMStatus Predict();
// @return predicted tensor
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t); void SetInput(const Tensor &input, const std::string &var_name);
void SetInput(const LoDTensor &input, const std::string &var_name);
// predict with lod tensor input
// @param t input lod tensor to do prediction std::shared_ptr<LoDTensor> GetOutput(const std::string &var_name);
// @return predicted lod tensor
std::shared_ptr<framework::LoDTensor> PredictLod(
const framework::LoDTensor &t);
// predict with vector input and dims
// @param input vector whose elements will be formed
// @param input lod tensor to do prediction
// @param dims vector whose elements will be formed
// @param input tensor shape
// @return vector which is flatted from predicted tensor
std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InjectVariable(const framework::Tensor &t, std::string var_name); void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1); std::shared_ptr<Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
void Predict_To(int end); void Predict_To(int end);
...@@ -70,26 +59,28 @@ class Executor { ...@@ -70,26 +59,28 @@ class Executor {
protected: protected:
Executor() = default; Executor() = default;
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t,
int block_id); bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc, Variable *var,
bool varInputMemory(const std::shared_ptr<framework::VarDesc> &var_desc, LoDTensor *tensor) const;
framework::Variable *var,
framework::LoDTensor *tensor) const;
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void LoadMemory(void **data, void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
const std::shared_ptr<framework::VarDesc> var_desc, LoDTensor *tensor);
framework::LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
void LoadMemory(const framework::VarDesc var_desc, float *tensorInput, void LoadMemory(const VarDesc var_desc, float *tensorInput, char **data);
char **data);
#endif #endif
framework::Program<Dtype> program_;
int batch_size_ = 1; int batch_size_;
std::shared_ptr<framework::ProgramDesc> to_predict_program_; bool use_optimize_;
std::map<framework::BlockDesc, bool lod_mode_;
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>> Program<Device> program_;
ops_of_block_; std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
// operators list
std::vector<OperatorBasePtr> ops_list_;
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
struct ProfInfo { struct ProfInfo {
int tid = 0; int tid = 0;
...@@ -97,8 +88,6 @@ class Executor { ...@@ -97,8 +88,6 @@ class Executor {
uint64_t runEnd = 0UL; uint64_t runEnd = 0UL;
}; };
#endif #endif
bool use_optimize_ = false;
bool loddable_ = false;
}; };
} // namespace framework } // namespace framework
......
...@@ -23,14 +23,8 @@ limitations under the License. */ ...@@ -23,14 +23,8 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
/** template <typename Device, typename T>
* muteandresize tensor as originProgramDesc and scope in loadParams void Loader<Device, T>::InitMemoryFromProgram(
*
* @param originProgramDesc
* @param scope
*/
template <typename Dtype, Precision P>
void Loader<Dtype, P>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc, const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) { const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) { for (const auto &block : originProgramDesc.get()->Blocks()) {
...@@ -43,8 +37,6 @@ void Loader<Dtype, P>::InitMemoryFromProgram( ...@@ -43,8 +37,6 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
tensor->Resize(make_ddim(dim)); tensor->Resize(make_ddim(dim));
} else { } else {
auto dim = var_desc->Tensor_desc().Dims(); auto dim = var_desc->Tensor_desc().Dims();
// PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
// dim[0] = 1;
if (dim.size() == 0) { if (dim.size() == 0) {
auto tensor = var->GetMutable<LoDTensor>(); auto tensor = var->GetMutable<LoDTensor>();
framework::DDim dDim = {0}; framework::DDim dDim = {0};
...@@ -60,7 +52,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram( ...@@ -60,7 +52,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
} }
} }
} else { } else {
// TODO(codeWorm): some. // TODO(codeWorm)
} }
} }
} }
...@@ -68,7 +60,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram( ...@@ -68,7 +60,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
template <> template <>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram( void Loader<GPU_CL, float>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc, const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) { const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) { for (const auto &block : originProgramDesc.get()->Blocks()) {
...@@ -77,7 +69,6 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram( ...@@ -77,7 +69,6 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) { if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims(); auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>(); auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim)); cl_image->Resize(make_ddim(dim));
} else { } else {
...@@ -88,14 +79,13 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram( ...@@ -88,14 +79,13 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
cl_image->Resize(make_ddim(dim)); cl_image->Resize(make_ddim(dim));
} }
} else { } else {
// TODO(codeWorm): some. // TODO(codeWorm)
} }
} }
} }
} }
template <> template <>
const Program<GPU_CL, Precision::FP32> const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len, size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) { uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false; bool can_add_split = false;
...@@ -113,7 +103,7 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory( ...@@ -113,7 +103,7 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program); auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<GPU_CL, Precision::FP32> program; Program<GPU_CL, float> program;
program.combined = true; program.combined = true;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification; program.quantification = quantification;
...@@ -145,16 +135,16 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory( ...@@ -145,16 +135,16 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
/** /**
* fusion and print someinfos * fusion and print someinfos
* @tparam Dtype * @tparam Device
* @tparam P * @tparam P
* @param optimize * @param optimize
* @param can_add_split * @param can_add_split
* @param program * @param program
* @param originProgramDesc * @param originProgramDesc
*/ */
template <typename Dtype, Precision P> template <typename Device, typename T>
void FusionAndPrintInfos( void FusionAndPrintInfos(
bool optimize, bool can_add_split, Program<Dtype, P> *program, bool optimize, bool can_add_split, Program<Device, T> *program,
const std::shared_ptr<ProgramDesc> &originProgramDesc) { const std::shared_ptr<ProgramDesc> &originProgramDesc) {
if (optimize) { if (optimize) {
ProgramOptimize program_optimize; ProgramOptimize program_optimize;
...@@ -193,22 +183,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { ...@@ -193,22 +183,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
return cur_len; return cur_len;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &dirname, const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname,
bool optimize, bool optimize,
bool quantification, bool quantification,
bool can_add_split) { bool can_add_split) {
auto program = this->LoadProgram(dirname + "/__model__", optimize, auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split); quantification, can_add_split);
program.model_path = dirname; program.model_path = dirname;
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path, const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize, bool optimize,
bool quantification) { bool quantification) {
auto program = this->LoadProgram(model_path, optimize, quantification); auto program = this->LoadProgram(model_path, optimize, quantification);
program.para_path = para_path; program.para_path = para_path;
...@@ -217,8 +207,8 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path, ...@@ -217,8 +207,8 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
const Program<Dtype, P> Loader<Dtype, P>::LoadProgram( const Program<Device, T> Loader<Device, T>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification, const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) { bool can_add_split) {
std::string model_filename = model_path; std::string model_filename = model_path;
...@@ -237,7 +227,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram( ...@@ -237,7 +227,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
// //
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program); auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<Dtype, P> program; Program<Device, T> program;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = 0; program.combined_params_len = 0;
...@@ -254,8 +244,8 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram( ...@@ -254,8 +244,8 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory( const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len, size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) { uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false; bool can_add_split = false;
...@@ -273,7 +263,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory( ...@@ -273,7 +263,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program); auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<Dtype, P> program; Program<Device, T> program;
program.combined = true; program.combined = true;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification; program.quantification = quantification;
...@@ -289,13 +279,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory( ...@@ -289,13 +279,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
return program; return program;
} }
template class Loader<CPU, Precision::FP32>; template class Loader<CPU, float>;
template class Loader<FPGA, Precision::FP32>; template class Loader<FPGA, float>;
template class Loader<GPU_MALI, Precision::FP32>; template class Loader<GPU_MALI, float>;
template class Loader<GPU_CL, Precision::FP32>; template class Loader<GPU_CL, float>;
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -22,39 +22,39 @@ limitations under the License. */ ...@@ -22,39 +22,39 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32> template <typename Device = CPU, typename T = float>
class Loader { class Loader {
public: public:
/* /*
* @b load separate format fluid model * @b load separate format fluid model
* @b 加载分开形式的 fluid 模型 * @b 加载分开存储的fluid模型
* */ * */
const Program<Dtype, P> Load(const std::string &dirname, const Program<Device, T> Load(const std::string &dirname,
bool optimize = false, bool optimize = false,
bool quantification = false, bool quantification = false,
bool can_add_split = false); bool can_add_split = false);
/* /*
* @b load combine format fluid mode * @b load combine format fluid mode
* @b 加载结合在一起格式的模型 * @b 加载统一存储的fluid模型
* */ * */
const Program<Dtype, P> Load(const std::string &model_path, const Program<Device, T> Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize = false, bool optimize = false,
bool quantification = false); bool quantification = false);
const Program<Dtype, P> LoadCombinedMemory(size_t model_len, const Program<Device, T> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf, const uint8_t *model_buf,
size_t combined_params_len, size_t combined_params_len,
uint8_t *combined_params_buf, uint8_t *combined_params_buf,
bool optimize = false, bool optimize = false,
bool quantification = false); bool quantification = false);
private: private:
const Program<Dtype, P> LoadProgram(const std::string &model_path, const Program<Device, T> LoadProgram(const std::string &model_path,
bool optimize = false, bool optimize = false,
bool quantification = false, bool quantification = false,
bool can_add_split = false); bool can_add_split = false);
void InitMemoryFromProgram( void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc, const std::shared_ptr<ProgramDesc> &originProgramDesc,
......
...@@ -16,12 +16,12 @@ limitations under the License. */ ...@@ -16,12 +16,12 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "tensor.h" #include "framework/tensor.h"
#include "tensor_util.h" #include "framework/tensor_util.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
/* /*
...@@ -202,5 +202,29 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor); ...@@ -202,5 +202,29 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
void DeserializeFromStream(std::istream &is, LoDTensor *tensor); void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
#ifdef PADDLE_MOBILE_DEBUG
inline Print &operator<<(Print &printer, const LoDTensor &tensor) {
printer << " dims: " << tensor.dims() << "\n";
int stride = tensor.numel() / 20;
stride = stride > 0 ? stride : 1;
#ifndef PADDLE_MOBILE_FPGA
for (int i = 0; i < tensor.numel(); i += stride) {
if (tensor.type() == typeid(float)) {
printer << tensor.data<float>()[i] << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(int64_t)) {
printer << tensor.data<int64_t>()[i] << " ";
} else if (tensor.type() == typeid(int8_t)) {
printer << static_cast<int>(tensor.data<int8_t>()[i]) << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
}
}
#endif // PADDLE_MOBILE_FPGA
return printer;
}
#endif // PADDLE_MOBILE_DEBUG
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,16 +14,15 @@ limitations under the License. */ ...@@ -14,16 +14,15 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "common/types.h" #include "common/types.h"
#include "framework/program/program_desc.h" #include "framework/program/program_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include <string>
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
template <typename Dtype, Precision P = Precision::FP32> template <typename Device, typename T = float>
class Program { class Program {
public: public:
std::shared_ptr<ProgramDesc> originProgram; std::shared_ptr<ProgramDesc> originProgram;
......
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class Scope { class Scope {
public: public:
Scope() = default; Scope() = default;
......
...@@ -226,7 +226,6 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) { ...@@ -226,7 +226,6 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
} }
} }
#endif #endif
return printer; return printer;
} }
......
...@@ -18,17 +18,17 @@ ...@@ -18,17 +18,17 @@
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P> template <typename Device, typename T>
PaddleMobilePredictor<Dtype, P>::PaddleMobilePredictor( PaddleMobilePredictor<Device, T>::PaddleMobilePredictor(
const PaddleMobileConfig &config) { const PaddleMobileConfig &config) {
PADDLE_MOBILE_ENFORCE(Init(config) == true, PADDLE_MOBILE_ENFORCE(Init(config) == true,
"paddle mobile predictor init failed!"); "paddle mobile predictor init failed!");
config_ = config; config_ = config;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) { bool PaddleMobilePredictor<Device, T>::Init(const PaddleMobileConfig &config) {
paddle_mobile_.reset(new PaddleMobile<Dtype, P>()); paddle_mobile_.reset(new PaddleMobile<Device, T>());
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
paddle_mobile_->SetCLPath(config.cl_path); paddle_mobile_->SetCLPath(config.cl_path);
#endif #endif
...@@ -52,8 +52,8 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) { ...@@ -52,8 +52,8 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_->SetThreadNum(config.thread_num); paddle_mobile_->SetThreadNum(config.thread_num);
return true; return true;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
bool PaddleMobilePredictor<Dtype, P>::Run( bool PaddleMobilePredictor<Device, T>::Run(
const std::vector<PaddleTensor> &inputs, const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) { std::vector<PaddleTensor> *output_data, int batch_size) {
if (inputs.empty()) { if (inputs.empty()) {
...@@ -78,12 +78,12 @@ bool PaddleMobilePredictor<Dtype, P>::Run( ...@@ -78,12 +78,12 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
framework::Tensor input_tensor; framework::Tensor input_tensor;
input_tensor.Resize(ddim); input_tensor.Resize(ddim);
int input_length = framework::product(ddim); int input_length = framework::product(ddim);
typedef typename PrecisionTrait<P>::ptype PType; auto input_ptr = input_tensor.mutable_data<T>();
auto input_ptr = input_tensor.mutable_data<PType>();
memcpy(input_ptr, static_cast<PType *>(input.data.data()), memcpy(input_ptr, static_cast<T *>(input.data.data()),
input_length * sizeof(PType)); input_length * sizeof(T));
auto output_tensor = paddle_mobile_->Predict(input_tensor); paddle_mobile_->Predict(input_tensor);
auto output_tensor = paddle_mobile_->Fetch();
if (output_data->empty()) { if (output_data->empty()) {
LOG(kLOG_ERROR) << "At least one output should be set with tensors' names."; LOG(kLOG_ERROR) << "At least one output should be set with tensors' names.";
...@@ -99,18 +99,18 @@ bool PaddleMobilePredictor<Dtype, P>::Run( ...@@ -99,18 +99,18 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
output.shape.push_back(static_cast<int>(d)); output.shape.push_back(static_cast<int>(d));
} }
if (output.data.length() < output_length * sizeof(PType)) { if (output.data.length() < output_length * sizeof(T)) {
output.data.Resize(output_length * sizeof(PType)); output.data.Resize(output_length * sizeof(T));
} }
memcpy(output.data.data(), output_tensor->template data<PType>(), memcpy(output.data.data(), output_tensor->template data<T>(),
output_length * sizeof(PType)); output_length * sizeof(T));
return true; return true;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
PaddleMobilePredictor<Dtype, P>::~PaddleMobilePredictor() { PaddleMobilePredictor<Device, T>::~PaddleMobilePredictor() {
paddle_mobile_->Clear(); paddle_mobile_->Clear();
} }
...@@ -122,13 +122,13 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>( ...@@ -122,13 +122,13 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
std::unique_ptr<PaddlePredictor> x; std::unique_ptr<PaddlePredictor> x;
if (config.precision == PaddleMobileConfig::FP32) { if (config.precision == PaddleMobileConfig::FP32) {
if (config.device == PaddleMobileConfig::kCPU) { if (config.device == PaddleMobileConfig::kCPU) {
x.reset(new PaddleMobilePredictor<CPU, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<CPU, float>(config));
} else if (config.device == PaddleMobileConfig::kFPGA) { } else if (config.device == PaddleMobileConfig::kFPGA) {
x.reset(new PaddleMobilePredictor<FPGA, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<FPGA, float>(config));
} else if (config.device == PaddleMobileConfig::kGPU_MALI) { } else if (config.device == PaddleMobileConfig::kGPU_MALI) {
x.reset(new PaddleMobilePredictor<GPU_MALI, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<GPU_MALI, float>(config));
} else if (config.device == PaddleMobileConfig::kGPU_CL) { } else if (config.device == PaddleMobileConfig::kGPU_CL) {
x.reset(new PaddleMobilePredictor<GPU_CL, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<GPU_CL, float>(config));
} else { } else {
LOG(kLOG_ERROR) << "unsupport device type!"; LOG(kLOG_ERROR) << "unsupport device type!";
return nullptr; return nullptr;
......
...@@ -29,7 +29,7 @@ limitations under the License. */ ...@@ -29,7 +29,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype = CPU, Precision P = Precision::FP32> template <typename Device = CPU, typename T = float>
class PaddleMobilePredictor : public PaddlePredictor { class PaddleMobilePredictor : public PaddlePredictor {
public: public:
PaddleMobilePredictor() = delete; PaddleMobilePredictor() = delete;
...@@ -43,7 +43,7 @@ class PaddleMobilePredictor : public PaddlePredictor { ...@@ -43,7 +43,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
~PaddleMobilePredictor() override; ~PaddleMobilePredictor() override;
private: private:
std::unique_ptr<PaddleMobile<Dtype, P>> paddle_mobile_; std::unique_ptr<PaddleMobile<Device, T>> paddle_mobile_;
bool Init(const PaddleMobileConfig& config); bool Init(const PaddleMobileConfig& config);
PaddleMobileConfig config_; PaddleMobileConfig config_;
......
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
@interface PaddleMobileCPU() @interface PaddleMobileCPU()
{ {
paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_; paddle_mobile::PaddleMobile<paddle_mobile::CPU, float> *pam_;
BOOL loaded_; BOOL loaded_;
} }
@end @end
...@@ -59,7 +59,7 @@ static std::mutex shared_mutex; ...@@ -59,7 +59,7 @@ static std::mutex shared_mutex;
- (instancetype)init { - (instancetype)init {
if (self = [super init]) { if (self = [super init]) {
pam_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32>(); pam_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU, float>();
} }
return self; return self;
} }
...@@ -220,7 +220,8 @@ static std::mutex shared_mutex; ...@@ -220,7 +220,8 @@ static std::mutex shared_mutex;
memcpy(input_ptr, input, memcpy(input_ptr, input,
numel * sizeof(float)); numel * sizeof(float));
std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Predict(input_tensor); pam_->Predict(input_tensor);
std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Fetch();
float *output_pointer = new float[output->numel()]; float *output_pointer = new float[output->numel()];
......
...@@ -16,21 +16,23 @@ limitations under the License. */ ...@@ -16,21 +16,23 @@ limitations under the License. */
#include "paddle_mobile_jni.h" #include "paddle_mobile_jni.h"
#include <cmath> #include <cmath>
#include <string>
#include <vector>
#include "common/log.h" #include "common/log.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "io/paddle_mobile.h" #include "io/paddle_mobile.h"
#ifdef ENABLE_EXCEPTION #ifdef ENABLE_EXCEPTION
#include "common/enforce.h" #include "common/enforce.h"
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
namespace paddle_mobile { namespace paddle_mobile {
namespace jni { namespace jni {
using framework::DDim; using framework::DDim;
using framework::Program; using framework::Program;
using framework::Tensor; using framework::Tensor;
...@@ -200,7 +202,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage( ...@@ -200,7 +202,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
input_ptr[i] = dataPointer[i]; input_ptr[i] = dataPointer[i];
} }
auto output = getPaddleMobileInstance()->Predict(input); getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel(); count = output->numel();
result = env->NewFloatArray(count); result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>()); env->SetFloatArrayRegion(result, 0, count, output->data<float>());
...@@ -233,7 +236,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage( ...@@ -233,7 +236,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
input_ptr[i] = dataPointer[i]; input_ptr[i] = dataPointer[i];
} }
auto output = getPaddleMobileInstance()->Predict(input); getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel(); count = output->numel();
result = env->NewFloatArray(count); result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>()); env->SetFloatArrayRegion(result, 0, count, output->data<float>());
...@@ -328,7 +332,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv( ...@@ -328,7 +332,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
input_ptr[i] = matrix[i]; input_ptr[i] = matrix[i];
} }
auto output = getPaddleMobileInstance()->Predict(input); getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel(); count = output->numel();
result = env->NewFloatArray(count); result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>()); env->SetFloatArrayRegion(result, 0, count, output->data<float>());
...@@ -363,7 +368,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv( ...@@ -363,7 +368,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
input_ptr[i] = matrix[i]; input_ptr[i] = matrix[i];
} }
auto output = getPaddleMobileInstance()->Predict(input); getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel(); count = output->numel();
result = env->NewFloatArray(count); result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>()); env->SetFloatArrayRegion(result, 0, count, output->data<float>());
...@@ -399,7 +405,8 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) { ...@@ -399,7 +405,8 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) {
auto *pdata = words.mutable_data<int64_t>(); auto *pdata = words.mutable_data<int64_t>();
size_t n = words.numel() * sizeof(int64_t); size_t n = words.numel() * sizeof(int64_t);
memcpy(pdata, ids.data(), n); memcpy(pdata, ids.data(), n);
auto vec_result = paddle_mobile.PredictLod(words); paddle_mobile.Predict(words);
auto vec_result = paddle_mobile.Fetch();
int count = vec_result->numel(); int count = vec_result->numel();
jlongArray result = NULL; jlongArray result = NULL;
ANDROIDLOGE("predict nlp size %d", count); ANDROIDLOGE("predict nlp size %d", count);
......
...@@ -13,81 +13,81 @@ See the License for the specific language governing permissions and ...@@ -13,81 +13,81 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io/paddle_mobile.h" #include "io/paddle_mobile.h"
#include <utility>
#include "common/common.h"
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
#include <CL/cl.h> #include <CL/cl.h>
#include "framework/cl/cl_tensor.h" #include "framework/cl/cl_tensor.h"
#endif #endif
#include "common/common.h"
#include "operators/math/gemm.h" #include "operators/math/gemm.h"
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P> template <typename Device, typename T>
void PaddleMobile<Dtype, P>::SetThreadNum(int num) { void PaddleMobile<Device, T>::SetThreadNum(int num) {
#ifdef _OPENMP #ifdef _OPENMP
omp_set_num_threads(num); omp_set_num_threads(num);
#endif #endif
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
bool quantification, int batch_size, bool optimize, bool quantification,
bool loddable) { int batch_size, bool loddable) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(dirname, optimize, quantification), batch_size, optimize, loader_->Load(dirname, optimize, quantification), batch_size, optimize,
loddable); loddable);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
return true; return PMSuccess;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
bool PaddleMobile<Dtype, P>::Load(const std::string &model_path, PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
const std::string &para_path, bool optimize, const std::string &para_path,
bool quantification, int batch_size, bool optimize, bool quantification,
bool loddable) { int batch_size, bool loddable) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(model_path, para_path, optimize, quantification), loader_->Load(model_path, para_path, optimize, quantification),
batch_size, optimize, loddable); batch_size, optimize, loddable);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
return true; return PMSuccess;
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len, bool PaddleMobile<Device, T>::LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf, const uint8_t *model_buf,
size_t combined_params_len, size_t combined_params_len,
uint8_t *combined_params_buf) { uint8_t *combined_params_buf) {
int batch_size = 1; int batch_size = 1;
bool optimise = true; bool optimise = true;
bool quantification = false; bool quantification = false;
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len, loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimise, combined_params_buf, optimise,
quantification), quantification),
...@@ -96,38 +96,76 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len, ...@@ -96,38 +96,76 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
return true; return PMSuccess;
}
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(const framework::Tensor &input) {
std::vector<std::pair<std::string, framework::Tensor>> inputs;
inputs.push_back(std::make_pair("feed", input));
return this->Predict(inputs);
} }
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::Predict( template <typename Device, typename T>
const framework::Tensor &t) { PMStatus PaddleMobile<Device, T>::Predict(const framework::LoDTensor &input) {
return executor_->Predict(t); std::vector<std::pair<std::string, framework::LoDTensor>> inputs;
inputs.push_back(std::make_pair("feed", input));
return this->Predict(inputs);
}
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(
const std::vector<std::pair<std::string, framework::Tensor>> &inputs) {
return executor_->Predict(inputs);
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::PredictLod( PMStatus PaddleMobile<Device, T>::Predict(
const framework::LoDTensor &t) { const std::vector<std::pair<std::string, framework::LoDTensor>> &inputs) {
return executor_->PredictLod(t); return executor_->Predict(inputs);
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
std::vector<typename PaddleMobile<Dtype, P>::Ptype> std::vector<T> PaddleMobile<Device, T>::Predict(
PaddleMobile<Dtype, P>::Predict(const std::vector<Ptype> &input, const std::vector<T> &input, const std::vector<int64_t> &dims) {
const std::vector<int64_t> &dims) {
return executor_->Predict(input, dims); return executor_->Predict(input, dims);
} }
template <typename Dtype, Precision P> template <typename Device, typename T>
void PaddleMobile<Dtype, P>::Clear() { PMStatus PaddleMobile<Device, T>::Predict() {
return executor_->Predict();
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::Tensor &input,
const std::string &var_name) {
executor_->SetInput(input, var_name);
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::LoDTensor &input,
const std::string &var_name) {
executor_->SetInput(input, var_name);
}
typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
template <typename Device, typename T>
LoDTensorPtr PaddleMobile<Device, T>::Fetch(const std::string &var_name) {
return executor_->GetOutput(var_name);
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Clear() {
executor_ = nullptr; executor_ = nullptr;
loader_ = nullptr; loader_ = nullptr;
} }
template <typename Dtype, Precision P>
double PaddleMobile<Dtype, P>::GetPredictTime() {} template <typename Device, typename T>
double PaddleMobile<Device, T>::GetPredictTime() {}
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
template <> template <>
double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() { double PaddleMobile<CPU, float>::GetPredictTime() {
int m = 32; int m = 32;
int n = 224 * 224; int n = 224 * 224;
int k = 27; int k = 27;
...@@ -148,7 +186,8 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() { ...@@ -148,7 +186,8 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
for (int i = 0; i < k * n; ++i) { for (int i = 0; i < k * n; ++i) {
b[i] = t1 + rand() % t2; // NOLINT b[i] = t1 + rand() % t2; // NOLINT
} }
paddle_mobile::operators::math::Gemm gemm;
operators::math::Gemm gemm;
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb, gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb,
static_cast<float>(0), c, ldc, false, static_cast<float>(0), c, ldc, false,
...@@ -162,57 +201,51 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() { ...@@ -162,57 +201,51 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
} }
#endif #endif
template <typename Dtype, Precision P>
PaddleMobile<Dtype, P>::~PaddleMobile() {
executor_ = nullptr;
loader_ = nullptr;
}
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
template <typename Device, T P>
template <typename Dtype, Precision P> void PaddleMobile<Device, P>::InjectVariable(const framework::Tensor &t,
void PaddleMobile<Dtype, P>::InjectVariable(const framework::Tensor &t, std::string var_name) {
std::string var_name) {
executor_->InjectVariable(t, var_name); executor_->InjectVariable(t, var_name);
} }
template <typename Dtype, Precision P> template <typename Device, T P>
void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) { void PaddleMobile<Device, P>::FeedData(const framework::Tensor &t) {
executor_->FeedData(t); executor_->FeedData(t);
} }
template <typename Dtype, Precision P> template <typename Device, T P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult(int id) { std::shared_ptr<framework::Tensor> PaddleMobile<Device, P>::FetchResult(
int id) {
return executor_->FetchResult(id); return executor_->FetchResult(id);
} }
template <typename Dtype, Precision P> template <typename Device, T P>
void PaddleMobile<Dtype, P>::Predict_From_To(int start, int end) { void PaddleMobile<Device, P>::Predict_From_To(int start, int end) {
executor_->Predict_From_To(start, end); executor_->Predict_From_To(start, end);
} }
template <typename Dtype, Precision P> template <typename Device, T P>
void PaddleMobile<Dtype, P>::Predict_From(int start) { void PaddleMobile<Device, P>::Predict_From(int start) {
executor_->Predict_From(start); executor_->Predict_From(start);
} }
template <typename Dtype, Precision P> template <typename Device, T P>
void PaddleMobile<Dtype, P>::Predict_To(int end) { void PaddleMobile<Device, P>::Predict_To(int end) {
executor_->Predict_To(end); executor_->Predict_To(end);
} }
#endif #endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
static std::mutex lc; static std::mutex lc;
template <typename Dtype, Precision P> template <typename Device, T P>
void PaddleMobile<Dtype, P>::SetCLPath(std::string path) { void PaddleMobile<Device, P>::SetCLPath(std::string path) {
std::lock_guard<std::mutex> lock(lc); std::lock_guard<std::mutex> lock(lc);
if (framework::CLEngine::Instance()->GetCLPath() == "") { if (framework::CLEngine::Instance()->GetCLPath() == "") {
framework::CLEngine::Instance()->setClPath(path); framework::CLEngine::Instance()->setClPath(path);
} }
} }
template <> template <>
double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() { double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
cl_int status; cl_int status;
cl_uint nPlatform; cl_uint nPlatform;
clGetPlatformIDs(0, NULL, &nPlatform); clGetPlatformIDs(0, NULL, &nPlatform);
...@@ -410,8 +443,8 @@ double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() { ...@@ -410,8 +443,8 @@ double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
return -1; return -1;
} }
} }
template <typename Dtype, Precision P> template <typename Device, T P>
int PaddleMobile<Dtype, P>::readText( int PaddleMobile<Device, P>::readText(
const char *kernelPath, const char *kernelPath,
char **pcode) { // 读取文本文件放入 pcode,返回字符串长度 char **pcode) { // 读取文本文件放入 pcode,返回字符串长度
FILE *fp; FILE *fp;
...@@ -440,13 +473,11 @@ int PaddleMobile<Dtype, P>::readText( ...@@ -440,13 +473,11 @@ int PaddleMobile<Dtype, P>::readText(
fclose(fp); fclose(fp);
return size + 1; return size + 1;
} }
#endif #endif
template class PaddleMobile<CPU, Precision::FP32>; template class PaddleMobile<CPU, float>;
template class PaddleMobile<FPGA, Precision::FP32>; template class PaddleMobile<FPGA, float>;
template class PaddleMobile<GPU_MALI, Precision::FP32>; template class PaddleMobile<GPU_MALI, float>;
template class PaddleMobile<GPU_CL, float>;
template class PaddleMobile<GPU_CL, Precision::FP32>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#ifdef _OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
...@@ -32,43 +33,52 @@ limitations under the License. */ ...@@ -32,43 +33,52 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype = CPU, Precision P = Precision::FP32> template <typename Device, typename T = float>
class PaddleMobile { class PaddleMobile {
typedef typename PrecisionTrait<P>::ptype Ptype;
public: public:
PaddleMobile() { PaddleMobile() {
#ifndef PADDLE_MOBILE_CL #ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Dtype>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
"Not Enable GPU in CmakeList but run gpu codes ");
#endif #endif
} }
bool Load(const std::string &dirname, bool optimize = false, ~PaddleMobile() {}
bool quantification = false, int batch_size = 1,
bool loddable = false); PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1,
const bool lod = false);
PMStatus Load(const std::string &model_path, const std::string &para_path,
const bool optimize = false, const bool quantification = false,
const int batch_size = 1, const bool lod = false);
PMStatus Predict(const framework::Tensor &input);
PMStatus Predict(const framework::LoDTensor &input);
bool Load(const std::string &model_path, const std::string &para_path, PMStatus Predict(
bool optimize = false, bool quantification = false, const std::vector<std::pair<std::string, framework::Tensor>> &inputs);
int batch_size = 1, bool loddable = false); PMStatus Predict(
const std::vector<std::pair<std::string, framework::LoDTensor>> &inputs);
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t); std::vector<T> Predict(const std::vector<T> &input,
const std::vector<int64_t> &dims);
PMStatus Predict();
std::shared_ptr<framework::Tensor> PredictLod(const framework::LoDTensor &t); void Feed(const framework::LoDTensor &input, const std::string &var_name);
void Feed(const framework::Tensor &input, const std::string &var_name);
std::vector<Ptype> Predict(const std::vector<Ptype> &input, typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
const std::vector<int64_t> &dims); LoDTensorPtr Fetch(const std::string &var_name);
LoDTensorPtr Fetch() { return Fetch("fetch"); }
bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf, bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
size_t combined_params_len, size_t combined_params_len,
uint8_t *combined_params_buf); uint8_t *combined_params_buf);
void SetThreadNum(int num); void SetThreadNum(int count);
void Clear(); void Clear();
double GetPredictTime(); double GetPredictTime();
~PaddleMobile();
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InjectVariable(const framework::Tensor &t, std::string var_name); void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
...@@ -79,15 +89,15 @@ class PaddleMobile { ...@@ -79,15 +89,15 @@ class PaddleMobile {
#endif #endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
public: public: // NOLINT
void SetCLPath(std::string cl_path); void SetCLPath(std::string cl_path);
int readText(const char *kernelPath, int readText(const char *kernelPath,
char **pcode); // 读取文本文件放入 pcode,返回字符串长度 char **pcode); // 读取文本文件放入 pcode,返回字符串长度
#endif #endif
private: private:
std::shared_ptr<framework::Loader<Dtype, P>> loader_; std::shared_ptr<framework::Loader<Device, T>> loader_;
std::shared_ptr<framework::Executor<Dtype, P>> executor_; std::shared_ptr<framework::Executor<Device, T>> executor_;
}; };
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,10 +14,12 @@ limitations under the License. */ ...@@ -14,10 +14,12 @@ limitations under the License. */
#include "io/paddle_test_inference_api.h" #include "io/paddle_test_inference_api.h"
#include "io/paddle_mobile.h" #include "io/paddle_mobile.h"
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P>
double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) { template <typename Device, typename T>
PaddleMobile<Dtype, P> paddle_mobile; double PaddleTester<Device, T>::CaculatePredictTime(std::string *cl_path) {
PaddleMobile<Device, T> paddle_mobile;
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
if (cl_path) { if (cl_path) {
paddle_mobile.SetCLPath(*cl_path); paddle_mobile.SetCLPath(*cl_path);
...@@ -26,10 +28,10 @@ double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) { ...@@ -26,10 +28,10 @@ double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) {
#endif #endif
return paddle_mobile.GetPredictTime(); return paddle_mobile.GetPredictTime();
} }
template class PaddleTester<CPU, Precision::FP32>; template class PaddleTester<CPU, float>;
template class PaddleTester<FPGA, Precision::FP32>; template class PaddleTester<FPGA, float>;
template class PaddleTester<GPU_MALI, Precision::FP32>; template class PaddleTester<GPU_MALI, float>;
template class PaddleTester<GPU_CL, Precision::FP32>; template class PaddleTester<GPU_CL, float>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -20,10 +20,13 @@ limitations under the License. */ ...@@ -20,10 +20,13 @@ limitations under the License. */
*/ */
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
#include "string" #include "string"
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32>
template <typename Device, typename T = float>
class PaddleTester { class PaddleTester {
public: public:
double CaculatePredictTime(std::string *cl_path = nullptr); double CaculatePredictTime(std::string *cl_path = nullptr);
......
...@@ -375,5 +375,8 @@ if (NOT FOUND_MATCH) ...@@ -375,5 +375,8 @@ if (NOT FOUND_MATCH)
# gen test # gen test
ADD_EXECUTABLE(test-super net/test_super.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-super net/test_super.cpp test_helper.h test_include.h)
target_link_libraries(test-super paddle-mobile) target_link_libraries(test-super paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
# gen test
ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h)
target_link_libraries(test-ocr paddle-mobile)
endif () endif ()
...@@ -39,6 +39,7 @@ using paddle_mobile::framework::Tensor; ...@@ -39,6 +39,7 @@ using paddle_mobile::framework::Tensor;
using paddle_mobile::framework::Variable; using paddle_mobile::framework::Variable;
using std::string; using std::string;
using std::vector; using std::vector;
template <typename DeviceType, typename OpType> template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> { class Executor4Test : public Executor<DeviceType> {
public: public:
...@@ -48,20 +49,19 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -48,20 +49,19 @@ class Executor4Test : public Executor<DeviceType> {
this->use_optimize_ = use_optimize; this->use_optimize_ = use_optimize;
this->program_ = p; this->program_ = p;
if (this->use_optimize_) { if (this->use_optimize_) {
this->to_predict_program_ = this->program_.optimizeProgram; this->program_desc_ = this->program_.optimizeProgram;
} else { } else {
this->to_predict_program_ = this->program_.originProgram; this->program_desc_ = this->program_.originProgram;
} }
if (this->program_.originProgram == nullptr) { if (this->program_.originProgram == nullptr) {
LOG(paddle_mobile::LogLevel::kLOG_ERROR) LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "program_desc_ == nullptr";
<< "to_predict_program_ == nullptr";
} }
const std::vector<std::shared_ptr<BlockDesc>> blocks = const std::vector<std::shared_ptr<BlockDesc>> blocks =
this->to_predict_program_->Blocks(); this->program_desc_->Blocks();
for (std::shared_ptr<BlockDesc> block_desc : blocks) { for (int block_id = 0; block_id < blocks.size(); ++block_id) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<OpDesc>> ops = blocks[block_id]->Ops();
for (int i = 0; i < ops.size(); ++i) { for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i]; auto op = ops[i];
if (op->Type() == op_type) { if (op->Type() == op_type) {
...@@ -73,18 +73,16 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -73,18 +73,16 @@ class Executor4Test : public Executor<DeviceType> {
paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp( paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope); op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr); this->ops_of_block_[block_id].push_back(op_ptr);
break; break;
} }
} }
} }
this->InitMemory(); this->InitMemory();
for (const auto &ops : this->ops_of_block_) {
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block = for (const auto &op : ops) {
this->to_predict_program_->Block(0); op->Init();
auto &ops = this->ops_of_block_[*to_predict_block.get()]; }
for (const auto &op : ops) {
op->Init();
} }
} }
...@@ -117,12 +115,10 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -117,12 +115,10 @@ class Executor4Test : public Executor<DeviceType> {
output_tensor_sptrs[i].reset(output_tensors[i]); output_tensor_sptrs[i].reset(output_tensors[i]);
} }
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block = for (auto &ops : this->ops_of_block_) {
this->to_predict_program_->Block(0); for (auto &op : ops) {
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); op->Run();
++j) { }
auto op = this->ops_of_block_[*to_predict_block.get()][j];
op->Run();
} }
return output_tensor_sptrs; return output_tensor_sptrs;
...@@ -139,14 +135,11 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -139,14 +135,11 @@ class Executor4Test : public Executor<DeviceType> {
auto *output_tensor = con_output->GetMutable<LoDTensor>(); auto *output_tensor = con_output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>(dDim); output_tensor->mutable_data<float>(dDim);
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block = for (auto &ops : this->ops_of_block_) {
this->to_predict_program_->Block(0); for (auto &op : ops) {
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); op->Run();
++j) { }
auto op = this->ops_of_block_[*to_predict_block.get()][j];
op->Run();
} }
return std::make_shared<paddle_mobile::framework::Tensor>( return std::make_shared<paddle_mobile::framework::Tensor>(
paddle_mobile::framework::Tensor(*output_tensor)); paddle_mobile::framework::Tensor(*output_tensor));
} }
......
...@@ -52,15 +52,16 @@ int main(int argc, char* argv[]) { ...@@ -52,15 +52,16 @@ int main(int argc, char* argv[]) {
SetupTensor<float>(&input, in_shape, 0.f, 255.f); SetupTensor<float>(&input, in_shape, 0.f, 255.f);
// warmup // warmup
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input); paddle_mobile.Predict(input);
} }
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input); paddle_mobile.Predict(input);
} }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n"; std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n";
std::ostringstream os("output tensor size: "); std::ostringstream os("output tensor size: ");
output = paddle_mobile.Fetch();
os << output->numel() << "\n" << output->data<float>()[0]; os << output->numel() << "\n" << output->data<float>()[0];
for (int i = 1; i < output->numel(); ++i) { for (int i = 1; i < output->numel(); ++i) {
os << ", " << output->data<float>()[i]; os << ", " << output->data<float>()[i];
......
...@@ -36,11 +36,11 @@ int main() { ...@@ -36,11 +36,11 @@ int main() {
input_tensor.data<float>() + input_tensor.numel()); input_tensor.data<float>() + input_tensor.numel());
// 预热十次 // 预热十次
for (int i = 0; i < 1; ++i) { for (int i = 0; i < 1; ++i) {
paddle_mobile.PredictLod(input_tensor); paddle_mobile.Predict(input_tensor);
} }
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 1; ++i) { for (int i = 0; i < 1; ++i) {
paddle_mobile.PredictLod(input_tensor); paddle_mobile.Predict(input_tensor);
} }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms" std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
......
...@@ -41,12 +41,12 @@ int main(int argc, char* argv[]) { ...@@ -41,12 +41,12 @@ int main(int argc, char* argv[]) {
#endif #endif
paddle_mobile.SetThreadNum(thread_num); paddle_mobile.SetThreadNum(thread_num);
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) { std::vector<float> output;
if (paddle_mobile.Load(g_googlenet, optimize, false, 1, true)) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl; << std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<float> output;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
if (feed_shape) { if (feed_shape) {
sscanf(feed_shape, "%d,%d,%d", &dims[1], &dims[2], &dims[3]); sscanf(feed_shape, "%d,%d,%d", &dims[1], &dims[2], &dims[3]);
......
...@@ -48,8 +48,8 @@ int main() { ...@@ -48,8 +48,8 @@ int main() {
DLOG << "words lod 22: " << words.lod(); DLOG << "words lod 22: " << words.lod();
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 1; ++i) { for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.PredictLod(words); paddle_mobile.Predict(words);
DLOG << *vec_result; DLOG << *paddle_mobile.Fetch();
} }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms" std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
...@@ -84,8 +84,8 @@ int main() { ...@@ -84,8 +84,8 @@ int main() {
DLOG << "words lod 22: " << words.lod(); DLOG << "words lod 22: " << words.lod();
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 1; ++i) { for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.PredictLod(words); paddle_mobile.Predict(words);
DLOG << *vec_result; DLOG << *paddle_mobile.Fetch();
} }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms" std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
void load_images(const char *image_dir, const char *images_list,
std::vector<std::string> *image_names,
std::vector<std::pair<int, int>> *image_shapes) {
int height, width;
std::string filename;
std::ifstream if_list(images_list, std::ios::in);
while (!if_list.eof()) {
if_list >> height >> width >> filename;
image_shapes->push_back(std::make_pair(height, width));
image_names->push_back(filename);
}
}
int main(int argc, char **argv) {
if (argc < 4) {
std::cerr << "Usage: ./test_ocr model_dir image_dir images_list."
<< std::endl;
return 1;
}
char *model_dir = argv[1];
char *image_dir = argv[2];
char *images_list = argv[3];
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(8);
auto isok = paddle_mobile.Load(std::string(model_dir) + "/model",
std::string(model_dir) + "/params", true,
false, 1, true);
DLOG << "pass init model";
std::vector<std::string> image_names;
std::vector<std::pair<int, int>> image_shapes;
load_images(image_dir, images_list, &image_names, &image_shapes);
DLOG << "pass load images";
for (int i = 0; i < image_names.size(); i++) {
std::string file_name = image_names[i];
std::vector<float> input;
std::vector<int64_t> dims{1, 1, 48, 512};
dims[2] = image_shapes[i].first;
dims[3] = image_shapes[i].second;
// load input image
std::string img_path = std::string(image_dir) + "/" + file_name;
std::cerr << "img_path: " << img_path << std::endl;
std::cerr << "shape = [" << dims[0] << ", " << dims[1] << ", " << dims[2]
<< ", " << dims[3] << "]" << std::endl;
GetInput<float>(img_path, &input, dims);
// predict
auto output = paddle_mobile.Predict(input, dims);
// print result
std::cerr << file_name << std::endl;
std::cerr << output[0];
for (int j = 1; j < output.size(); ++j) {
std::cerr << " " << output[j];
}
std::cerr << std::endl;
}
return 0;
}
...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0 ...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \ grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "paddle_mobile_jni.cpp"); do
cpplint $file; cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册