提交 9729edac 编写于 作者: H hjchen2

Support feed multi inputs and fetch multi outputs

上级 f20c9041
此差异已折叠。
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "common/types.h"
#include "common/util.h"
......@@ -28,41 +29,29 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32>
template <typename Device, typename T = float>
class Executor {
public:
typedef typename PrecisionTrait<P>::ptype Ptype;
// exector constructor
// @param program program converted from proto program in PaddlePaddle
// @param use_optimize bool whether use operator fusion to speed up or not
// @param loddable bool
Executor(const framework::Program<Dtype> program, int batch_size = 1,
const bool use_optimize = true, const bool loddable = false);
// predict with tensor input
// @param t input tensor to do prediction
// @return predicted tensor
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
// predict with lod tensor input
// @param t input lod tensor to do prediction
// @return predicted lod tensor
std::shared_ptr<framework::LoDTensor> PredictLod(
const framework::LoDTensor &t);
// predict with vector input and dims
// @param input vector whose elements will be formed
// @param input lod tensor to do prediction
// @param dims vector whose elements will be formed
// @param input tensor shape
// @return vector which is flatted from predicted tensor
std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims);
Executor(const Program<Device> &program, int batch_size = 1,
const bool use_optimize = true, const bool lod_mode = false);
PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs);
PMStatus Predict(
const std::vector<std::pair<std::string, LoDTensor>> &inputs);
std::vector<T> Predict(const std::vector<T> &input,
const std::vector<int64_t> &dims);
PMStatus Predict();
void SetInput(const Tensor &input, const std::string &var_name);
void SetInput(const LoDTensor &input, const std::string &var_name);
std::shared_ptr<LoDTensor> GetOutput(const std::string &var_name);
#ifdef PADDLE_MOBILE_FPGA
void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t);
std::shared_ptr<Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
void Predict_To(int end);
......@@ -70,26 +59,28 @@ class Executor {
protected:
Executor() = default;
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t,
int block_id);
bool varInputMemory(const std::shared_ptr<framework::VarDesc> &var_desc,
framework::Variable *var,
framework::LoDTensor *tensor) const;
bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc, Variable *var,
LoDTensor *tensor) const;
void InitMemory();
void InitCombineMemory();
void LoadMemory(void **data,
const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor);
void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL
void LoadMemory(const framework::VarDesc var_desc, float *tensorInput,
char **data);
void LoadMemory(const VarDesc var_desc, float *tensorInput, char **data);
#endif
framework::Program<Dtype> program_;
int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_;
int batch_size_;
bool use_optimize_;
bool lod_mode_;
Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
// operators list
std::vector<OperatorBasePtr> ops_list_;
#ifdef PADDLE_MOBILE_PROFILE
struct ProfInfo {
int tid = 0;
......@@ -97,8 +88,6 @@ class Executor {
uint64_t runEnd = 0UL;
};
#endif
bool use_optimize_ = false;
bool loddable_ = false;
};
} // namespace framework
......
......@@ -23,14 +23,8 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
/**
* muteandresize tensor as originProgramDesc and scope in loadParams
*
* @param originProgramDesc
* @param scope
*/
template <typename Dtype, Precision P>
void Loader<Dtype, P>::InitMemoryFromProgram(
template <typename Device, typename T>
void Loader<Device, T>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
......@@ -43,8 +37,6 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
tensor->Resize(make_ddim(dim));
} else {
auto dim = var_desc->Tensor_desc().Dims();
// PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
// dim[0] = 1;
if (dim.size() == 0) {
auto tensor = var->GetMutable<LoDTensor>();
framework::DDim dDim = {0};
......@@ -60,7 +52,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
}
}
} else {
// TODO(codeWorm): some.
// TODO(codeWorm)
}
}
}
......@@ -68,7 +60,7 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
#ifdef PADDLE_MOBILE_CL
template <>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
void Loader<GPU_CL, float>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
......@@ -77,7 +69,6 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
} else {
......@@ -88,14 +79,13 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
cl_image->Resize(make_ddim(dim));
}
} else {
// TODO(codeWorm): some.
// TODO(codeWorm)
}
}
}
}
template <>
const Program<GPU_CL, Precision::FP32>
Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false;
......@@ -113,7 +103,7 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<GPU_CL, Precision::FP32> program;
Program<GPU_CL, float> program;
program.combined = true;
program.originProgram = originProgramDesc;
program.quantification = quantification;
......@@ -145,16 +135,16 @@ Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
/**
* fusion and print someinfos
* @tparam Dtype
* @tparam Device
* @tparam P
* @param optimize
* @param can_add_split
* @param program
* @param originProgramDesc
*/
template <typename Dtype, Precision P>
template <typename Device, typename T>
void FusionAndPrintInfos(
bool optimize, bool can_add_split, Program<Dtype, P> *program,
bool optimize, bool can_add_split, Program<Device, T> *program,
const std::shared_ptr<ProgramDesc> &originProgramDesc) {
if (optimize) {
ProgramOptimize program_optimize;
......@@ -193,22 +183,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
return cur_len;
}
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &dirname,
bool optimize,
bool quantification,
bool can_add_split) {
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname,
bool optimize,
bool quantification,
bool can_add_split) {
auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split);
program.model_path = dirname;
return program;
}
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize,
bool quantification) {
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize,
bool quantification) {
auto program = this->LoadProgram(model_path, optimize, quantification);
program.para_path = para_path;
......@@ -217,8 +207,8 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
return program;
}
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) {
std::string model_filename = model_path;
......@@ -237,7 +227,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
//
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<Dtype, P> program;
Program<Device, T> program;
program.originProgram = originProgramDesc;
program.quantification = quantification;
program.combined_params_len = 0;
......@@ -254,8 +244,8 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
return program;
}
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false;
......@@ -273,7 +263,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<Dtype, P> program;
Program<Device, T> program;
program.combined = true;
program.originProgram = originProgramDesc;
program.quantification = quantification;
......@@ -289,13 +279,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
return program;
}
template class Loader<CPU, Precision::FP32>;
template class Loader<CPU, float>;
template class Loader<FPGA, Precision::FP32>;
template class Loader<FPGA, float>;
template class Loader<GPU_MALI, Precision::FP32>;
template class Loader<GPU_MALI, float>;
template class Loader<GPU_CL, Precision::FP32>;
template class Loader<GPU_CL, float>;
} // namespace framework
} // namespace paddle_mobile
......@@ -22,39 +22,39 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32>
template <typename Device = CPU, typename T = float>
class Loader {
public:
/*
* @b load separate format fluid model
* @b 加载分开形式的 fluid 模型
* @b 加载分开存储的fluid模型
* */
const Program<Dtype, P> Load(const std::string &dirname,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
const Program<Device, T> Load(const std::string &dirname,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* @b 加载统一存储的fluid模型
* */
const Program<Dtype, P> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false,
bool quantification = false);
const Program<Device, T> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false,
bool quantification = false);
const Program<Dtype, P> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf,
bool optimize = false,
bool quantification = false);
const Program<Device, T> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf,
bool optimize = false,
bool quantification = false);
private:
const Program<Dtype, P> LoadProgram(const std::string &model_path,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
const Program<Device, T> LoadProgram(const std::string &model_path,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
......
......@@ -16,12 +16,12 @@ limitations under the License. */
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "tensor.h"
#include "tensor_util.h"
#include "framework/tensor.h"
#include "framework/tensor_util.h"
namespace paddle_mobile {
namespace framework {
/*
......@@ -202,5 +202,29 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
#ifdef PADDLE_MOBILE_DEBUG
inline Print &operator<<(Print &printer, const LoDTensor &tensor) {
printer << " dims: " << tensor.dims() << "\n";
int stride = tensor.numel() / 20;
stride = stride > 0 ? stride : 1;
#ifndef PADDLE_MOBILE_FPGA
for (int i = 0; i < tensor.numel(); i += stride) {
if (tensor.type() == typeid(float)) {
printer << tensor.data<float>()[i] << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(int64_t)) {
printer << tensor.data<int64_t>()[i] << " ";
} else if (tensor.type() == typeid(int8_t)) {
printer << static_cast<int>(tensor.data<int8_t>()[i]) << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
}
}
#endif // PADDLE_MOBILE_FPGA
return printer;
}
#endif // PADDLE_MOBILE_DEBUG
} // namespace framework
} // namespace paddle_mobile
......@@ -14,16 +14,15 @@ limitations under the License. */
#pragma once
#include <string>
#include "common/types.h"
#include "framework/program/program_desc.h"
#include "framework/scope.h"
#include <string>
namespace paddle_mobile {
namespace framework {
template <typename Dtype, Precision P = Precision::FP32>
template <typename Device, typename T = float>
class Program {
public:
std::shared_ptr<ProgramDesc> originProgram;
......
......@@ -26,6 +26,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
class Scope {
public:
Scope() = default;
......
......@@ -226,7 +226,6 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
}
}
#endif
return printer;
}
......
......@@ -18,17 +18,17 @@
namespace paddle_mobile {
template <typename Dtype, Precision P>
PaddleMobilePredictor<Dtype, P>::PaddleMobilePredictor(
template <typename Device, typename T>
PaddleMobilePredictor<Device, T>::PaddleMobilePredictor(
const PaddleMobileConfig &config) {
PADDLE_MOBILE_ENFORCE(Init(config) == true,
"paddle mobile predictor init failed!");
config_ = config;
}
template <typename Dtype, Precision P>
bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_.reset(new PaddleMobile<Dtype, P>());
template <typename Device, typename T>
bool PaddleMobilePredictor<Device, T>::Init(const PaddleMobileConfig &config) {
paddle_mobile_.reset(new PaddleMobile<Device, T>());
#ifdef PADDLE_MOBILE_CL
paddle_mobile_->SetCLPath(config.cl_path);
#endif
......@@ -52,8 +52,8 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_->SetThreadNum(config.thread_num);
return true;
}
template <typename Dtype, Precision P>
bool PaddleMobilePredictor<Dtype, P>::Run(
template <typename Device, typename T>
bool PaddleMobilePredictor<Device, T>::Run(
const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) {
if (inputs.empty()) {
......@@ -78,12 +78,12 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
framework::Tensor input_tensor;
input_tensor.Resize(ddim);
int input_length = framework::product(ddim);
typedef typename PrecisionTrait<P>::ptype PType;
auto input_ptr = input_tensor.mutable_data<PType>();
auto input_ptr = input_tensor.mutable_data<T>();
memcpy(input_ptr, static_cast<PType *>(input.data.data()),
input_length * sizeof(PType));
auto output_tensor = paddle_mobile_->Predict(input_tensor);
memcpy(input_ptr, static_cast<T *>(input.data.data()),
input_length * sizeof(T));
paddle_mobile_->Predict(input_tensor);
auto output_tensor = paddle_mobile_->Fetch();
if (output_data->empty()) {
LOG(kLOG_ERROR) << "At least one output should be set with tensors' names.";
......@@ -99,18 +99,18 @@ bool PaddleMobilePredictor<Dtype, P>::Run(
output.shape.push_back(static_cast<int>(d));
}
if (output.data.length() < output_length * sizeof(PType)) {
output.data.Resize(output_length * sizeof(PType));
if (output.data.length() < output_length * sizeof(T)) {
output.data.Resize(output_length * sizeof(T));
}
memcpy(output.data.data(), output_tensor->template data<PType>(),
output_length * sizeof(PType));
memcpy(output.data.data(), output_tensor->template data<T>(),
output_length * sizeof(T));
return true;
}
template <typename Dtype, Precision P>
PaddleMobilePredictor<Dtype, P>::~PaddleMobilePredictor() {
template <typename Device, typename T>
PaddleMobilePredictor<Device, T>::~PaddleMobilePredictor() {
paddle_mobile_->Clear();
}
......@@ -122,13 +122,13 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
std::unique_ptr<PaddlePredictor> x;
if (config.precision == PaddleMobileConfig::FP32) {
if (config.device == PaddleMobileConfig::kCPU) {
x.reset(new PaddleMobilePredictor<CPU, Precision::FP32>(config));
x.reset(new PaddleMobilePredictor<CPU, float>(config));
} else if (config.device == PaddleMobileConfig::kFPGA) {
x.reset(new PaddleMobilePredictor<FPGA, Precision::FP32>(config));
x.reset(new PaddleMobilePredictor<FPGA, float>(config));
} else if (config.device == PaddleMobileConfig::kGPU_MALI) {
x.reset(new PaddleMobilePredictor<GPU_MALI, Precision::FP32>(config));
x.reset(new PaddleMobilePredictor<GPU_MALI, float>(config));
} else if (config.device == PaddleMobileConfig::kGPU_CL) {
x.reset(new PaddleMobilePredictor<GPU_CL, Precision::FP32>(config));
x.reset(new PaddleMobilePredictor<GPU_CL, float>(config));
} else {
LOG(kLOG_ERROR) << "unsupport device type!";
return nullptr;
......
......@@ -29,7 +29,7 @@ limitations under the License. */
namespace paddle_mobile {
template <typename Dtype = CPU, Precision P = Precision::FP32>
template <typename Device = CPU, typename T = float>
class PaddleMobilePredictor : public PaddlePredictor {
public:
PaddleMobilePredictor() = delete;
......@@ -43,7 +43,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
~PaddleMobilePredictor() override;
private:
std::unique_ptr<PaddleMobile<Dtype, P>> paddle_mobile_;
std::unique_ptr<PaddleMobile<Device, T>> paddle_mobile_;
bool Init(const PaddleMobileConfig& config);
PaddleMobileConfig config_;
......
......@@ -48,7 +48,7 @@
@interface PaddleMobileCPU()
{
paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_;
paddle_mobile::PaddleMobile<paddle_mobile::CPU, float> *pam_;
BOOL loaded_;
}
@end
......@@ -59,7 +59,7 @@ static std::mutex shared_mutex;
- (instancetype)init {
if (self = [super init]) {
pam_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32>();
pam_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU, float>();
}
return self;
}
......@@ -220,7 +220,8 @@ static std::mutex shared_mutex;
memcpy(input_ptr, input,
numel * sizeof(float));
std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Predict(input_tensor);
pam_->Predict(input_tensor);
std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Fetch();
float *output_pointer = new float[output->numel()];
......
......@@ -16,21 +16,23 @@ limitations under the License. */
#include "paddle_mobile_jni.h"
#include <cmath>
#include <string>
#include <vector>
#include "common/log.h"
#include "framework/tensor.h"
#include "io/paddle_mobile.h"
#ifdef ENABLE_EXCEPTION
#include "common/enforce.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
namespace paddle_mobile {
namespace jni {
using framework::DDim;
using framework::Program;
using framework::Tensor;
......@@ -200,7 +202,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for (int i = 0; i < length; i++) {
input_ptr[i] = dataPointer[i];
}
auto output = getPaddleMobileInstance()->Predict(input);
getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
......@@ -233,7 +236,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
for (int i = 0; i < length; i++) {
input_ptr[i] = dataPointer[i];
}
auto output = getPaddleMobileInstance()->Predict(input);
getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
......@@ -328,7 +332,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for (int i = 0; i < length; i++) {
input_ptr[i] = matrix[i];
}
auto output = getPaddleMobileInstance()->Predict(input);
getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
......@@ -363,7 +368,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
for (int i = 0; i < length; i++) {
input_ptr[i] = matrix[i];
}
auto output = getPaddleMobileInstance()->Predict(input);
getPaddleMobileInstance()->Predict(input);
auto output = getPaddleMobileInstance()->Fetch();
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
......@@ -399,7 +405,8 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) {
auto *pdata = words.mutable_data<int64_t>();
size_t n = words.numel() * sizeof(int64_t);
memcpy(pdata, ids.data(), n);
auto vec_result = paddle_mobile.PredictLod(words);
paddle_mobile.Predict(words);
auto vec_result = paddle_mobile.Fetch();
int count = vec_result->numel();
jlongArray result = NULL;
ANDROIDLOGE("predict nlp size %d", count);
......
......@@ -13,81 +13,81 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "io/paddle_mobile.h"
#include <utility>
#include "common/common.h"
#ifdef PADDLE_MOBILE_CL
#include <CL/cl.h>
#include "framework/cl/cl_tensor.h"
#endif
#include "common/common.h"
#include "operators/math/gemm.h"
namespace paddle_mobile {
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::SetThreadNum(int num) {
#ifdef _OPENMP
omp_set_num_threads(num);
#endif
}
template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
bool quantification, int batch_size,
bool loddable) {
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
bool optimize, bool quantification,
int batch_size, bool loddable) {
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>();
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
LOG(kLOG_INFO) << "loader inited";
}
if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>(
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(dirname, optimize, quantification), batch_size, optimize,
loddable);
} else {
LOG(kLOG_INFO) << "executor inited";
}
return true;
return PMSuccess;
}
template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
const std::string &para_path, bool optimize,
bool quantification, int batch_size,
bool loddable) {
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize, bool quantification,
int batch_size, bool loddable) {
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>();
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
LOG(kLOG_INFO) << "loader inited";
}
if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>(
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(model_path, para_path, optimize, quantification),
batch_size, optimize, loddable);
} else {
LOG(kLOG_INFO) << "executor inited";
}
return true;
return PMSuccess;
}
template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf) {
template <typename Device, typename T>
bool PaddleMobile<Device, T>::LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf) {
int batch_size = 1;
bool optimise = true;
bool quantification = false;
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Dtype, P>>();
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
LOG(kLOG_INFO) << "loader inited";
}
if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Dtype, P>>(
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimise,
quantification),
......@@ -96,38 +96,76 @@ bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
LOG(kLOG_INFO) << "executor inited";
}
return true;
return PMSuccess;
}
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(const framework::Tensor &input) {
std::vector<std::pair<std::string, framework::Tensor>> inputs;
inputs.push_back(std::make_pair("feed", input));
return this->Predict(inputs);
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::Predict(
const framework::Tensor &t) {
return executor_->Predict(t);
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(const framework::LoDTensor &input) {
std::vector<std::pair<std::string, framework::LoDTensor>> inputs;
inputs.push_back(std::make_pair("feed", input));
return this->Predict(inputs);
}
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(
const std::vector<std::pair<std::string, framework::Tensor>> &inputs) {
return executor_->Predict(inputs);
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::PredictLod(
const framework::LoDTensor &t) {
return executor_->PredictLod(t);
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict(
const std::vector<std::pair<std::string, framework::LoDTensor>> &inputs) {
return executor_->Predict(inputs);
}
template <typename Dtype, Precision P>
std::vector<typename PaddleMobile<Dtype, P>::Ptype>
PaddleMobile<Dtype, P>::Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims) {
template <typename Device, typename T>
std::vector<T> PaddleMobile<Device, T>::Predict(
const std::vector<T> &input, const std::vector<int64_t> &dims) {
return executor_->Predict(input, dims);
}
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Clear() {
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Predict() {
return executor_->Predict();
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::Tensor &input,
const std::string &var_name) {
executor_->SetInput(input, var_name);
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::LoDTensor &input,
const std::string &var_name) {
executor_->SetInput(input, var_name);
}
typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
template <typename Device, typename T>
LoDTensorPtr PaddleMobile<Device, T>::Fetch(const std::string &var_name) {
return executor_->GetOutput(var_name);
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::Clear() {
executor_ = nullptr;
loader_ = nullptr;
}
template <typename Dtype, Precision P>
double PaddleMobile<Dtype, P>::GetPredictTime() {}
template <typename Device, typename T>
double PaddleMobile<Device, T>::GetPredictTime() {}
#ifdef PADDLE_MOBILE_CPU
template <>
double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
double PaddleMobile<CPU, float>::GetPredictTime() {
int m = 32;
int n = 224 * 224;
int k = 27;
......@@ -148,7 +186,8 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
for (int i = 0; i < k * n; ++i) {
b[i] = t1 + rand() % t2; // NOLINT
}
paddle_mobile::operators::math::Gemm gemm;
operators::math::Gemm gemm;
auto time1 = paddle_mobile::time();
gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb,
static_cast<float>(0), c, ldc, false,
......@@ -162,57 +201,51 @@ double PaddleMobile<CPU, Precision::FP32>::GetPredictTime() {
}
#endif
template <typename Dtype, Precision P>
PaddleMobile<Dtype, P>::~PaddleMobile() {
executor_ = nullptr;
loader_ = nullptr;
}
#ifdef PADDLE_MOBILE_FPGA
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::InjectVariable(const framework::Tensor &t,
std::string var_name) {
template <typename Device, T P>
void PaddleMobile<Device, P>::InjectVariable(const framework::Tensor &t,
std::string var_name) {
executor_->InjectVariable(t, var_name);
}
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
template <typename Device, T P>
void PaddleMobile<Device, P>::FeedData(const framework::Tensor &t) {
executor_->FeedData(t);
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult(int id) {
template <typename Device, T P>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, P>::FetchResult(
int id) {
return executor_->FetchResult(id);
}
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_From_To(int start, int end) {
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_From_To(int start, int end) {
executor_->Predict_From_To(start, end);
}
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_From(int start) {
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_From(int start) {
executor_->Predict_From(start);
}
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_To(int end) {
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_To(int end) {
executor_->Predict_To(end);
}
#endif
#ifdef PADDLE_MOBILE_CL
static std::mutex lc;
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
template <typename Device, T P>
void PaddleMobile<Device, P>::SetCLPath(std::string path) {
std::lock_guard<std::mutex> lock(lc);
if (framework::CLEngine::Instance()->GetCLPath() == "") {
framework::CLEngine::Instance()->setClPath(path);
}
}
template <>
double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
cl_int status;
cl_uint nPlatform;
clGetPlatformIDs(0, NULL, &nPlatform);
......@@ -410,8 +443,8 @@ double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
return -1;
}
}
template <typename Dtype, Precision P>
int PaddleMobile<Dtype, P>::readText(
template <typename Device, T P>
int PaddleMobile<Device, P>::readText(
const char *kernelPath,
char **pcode) { // 读取文本文件放入 pcode,返回字符串长度
FILE *fp;
......@@ -440,13 +473,11 @@ int PaddleMobile<Dtype, P>::readText(
fclose(fp);
return size + 1;
}
#endif
template class PaddleMobile<CPU, Precision::FP32>;
template class PaddleMobile<FPGA, Precision::FP32>;
template class PaddleMobile<GPU_MALI, Precision::FP32>;
template class PaddleMobile<GPU_CL, Precision::FP32>;
template class PaddleMobile<CPU, float>;
template class PaddleMobile<FPGA, float>;
template class PaddleMobile<GPU_MALI, float>;
template class PaddleMobile<GPU_CL, float>;
} // namespace paddle_mobile
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <string>
#include <utility>
#include <vector>
#ifdef _OPENMP
#include <omp.h>
......@@ -32,43 +33,52 @@ limitations under the License. */
namespace paddle_mobile {
template <typename Dtype = CPU, Precision P = Precision::FP32>
template <typename Device, typename T = float>
class PaddleMobile {
typedef typename PrecisionTrait<P>::ptype Ptype;
public:
PaddleMobile() {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Dtype>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu,
"Not Enable GPU in CmakeList but run gpu codes ");
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif
}
bool Load(const std::string &dirname, bool optimize = false,
bool quantification = false, int batch_size = 1,
bool loddable = false);
~PaddleMobile() {}
PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1,
const bool lod = false);
PMStatus Load(const std::string &model_path, const std::string &para_path,
const bool optimize = false, const bool quantification = false,
const int batch_size = 1, const bool lod = false);
PMStatus Predict(const framework::Tensor &input);
PMStatus Predict(const framework::LoDTensor &input);
bool Load(const std::string &model_path, const std::string &para_path,
bool optimize = false, bool quantification = false,
int batch_size = 1, bool loddable = false);
PMStatus Predict(
const std::vector<std::pair<std::string, framework::Tensor>> &inputs);
PMStatus Predict(
const std::vector<std::pair<std::string, framework::LoDTensor>> &inputs);
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
std::vector<T> Predict(const std::vector<T> &input,
const std::vector<int64_t> &dims);
PMStatus Predict();
std::shared_ptr<framework::Tensor> PredictLod(const framework::LoDTensor &t);
void Feed(const framework::LoDTensor &input, const std::string &var_name);
void Feed(const framework::Tensor &input, const std::string &var_name);
std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims);
typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
LoDTensorPtr Fetch(const std::string &var_name);
LoDTensorPtr Fetch() { return Fetch("fetch"); }
bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf);
void SetThreadNum(int num);
void SetThreadNum(int count);
void Clear();
double GetPredictTime();
~PaddleMobile();
#ifdef PADDLE_MOBILE_FPGA
void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t);
......@@ -79,15 +89,15 @@ class PaddleMobile {
#endif
#ifdef PADDLE_MOBILE_CL
public:
public: // NOLINT
void SetCLPath(std::string cl_path);
int readText(const char *kernelPath,
char **pcode); // 读取文本文件放入 pcode,返回字符串长度
#endif
private:
std::shared_ptr<framework::Loader<Dtype, P>> loader_;
std::shared_ptr<framework::Executor<Dtype, P>> executor_;
std::shared_ptr<framework::Loader<Device, T>> loader_;
std::shared_ptr<framework::Executor<Device, T>> executor_;
};
} // namespace paddle_mobile
......@@ -14,10 +14,12 @@ limitations under the License. */
#include "io/paddle_test_inference_api.h"
#include "io/paddle_mobile.h"
namespace paddle_mobile {
template <typename Dtype, Precision P>
double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) {
PaddleMobile<Dtype, P> paddle_mobile;
template <typename Device, typename T>
double PaddleTester<Device, T>::CaculatePredictTime(std::string *cl_path) {
PaddleMobile<Device, T> paddle_mobile;
#ifdef PADDLE_MOBILE_CL
if (cl_path) {
paddle_mobile.SetCLPath(*cl_path);
......@@ -26,10 +28,10 @@ double PaddleTester<Dtype, P>::CaculatePredictTime(std::string *cl_path) {
#endif
return paddle_mobile.GetPredictTime();
}
template class PaddleTester<CPU, Precision::FP32>;
template class PaddleTester<FPGA, Precision::FP32>;
template class PaddleTester<GPU_MALI, Precision::FP32>;
template class PaddleTester<CPU, float>;
template class PaddleTester<FPGA, float>;
template class PaddleTester<GPU_MALI, float>;
template class PaddleTester<GPU_CL, Precision::FP32>;
template class PaddleTester<GPU_CL, float>;
} // namespace paddle_mobile
......@@ -20,10 +20,13 @@ limitations under the License. */
*/
#pragma once
#include "common/types.h"
#include "string"
namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32>
template <typename Device, typename T = float>
class PaddleTester {
public:
double CaculatePredictTime(std::string *cl_path = nullptr);
......
......@@ -375,5 +375,8 @@ if (NOT FOUND_MATCH)
# gen test
ADD_EXECUTABLE(test-super net/test_super.cpp test_helper.h test_include.h)
target_link_libraries(test-super paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
# gen test
ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h)
target_link_libraries(test-ocr paddle-mobile)
endif ()
......@@ -39,6 +39,7 @@ using paddle_mobile::framework::Tensor;
using paddle_mobile::framework::Variable;
using std::string;
using std::vector;
template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> {
public:
......@@ -48,20 +49,19 @@ class Executor4Test : public Executor<DeviceType> {
this->use_optimize_ = use_optimize;
this->program_ = p;
if (this->use_optimize_) {
this->to_predict_program_ = this->program_.optimizeProgram;
this->program_desc_ = this->program_.optimizeProgram;
} else {
this->to_predict_program_ = this->program_.originProgram;
this->program_desc_ = this->program_.originProgram;
}
if (this->program_.originProgram == nullptr) {
LOG(paddle_mobile::LogLevel::kLOG_ERROR)
<< "to_predict_program_ == nullptr";
LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "program_desc_ == nullptr";
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
this->to_predict_program_->Blocks();
for (std::shared_ptr<BlockDesc> block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
this->program_desc_->Blocks();
for (int block_id = 0; block_id < blocks.size(); ++block_id) {
std::vector<std::shared_ptr<OpDesc>> ops = blocks[block_id]->Ops();
for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->Type() == op_type) {
......@@ -73,18 +73,16 @@ class Executor4Test : public Executor<DeviceType> {
paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
this->ops_of_block_[block_id].push_back(op_ptr);
break;
}
}
}
this->InitMemory();
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
auto &ops = this->ops_of_block_[*to_predict_block.get()];
for (const auto &op : ops) {
op->Init();
for (const auto &ops : this->ops_of_block_) {
for (const auto &op : ops) {
op->Init();
}
}
}
......@@ -117,12 +115,10 @@ class Executor4Test : public Executor<DeviceType> {
output_tensor_sptrs[i].reset(output_tensors[i]);
}
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size();
++j) {
auto op = this->ops_of_block_[*to_predict_block.get()][j];
op->Run();
for (auto &ops : this->ops_of_block_) {
for (auto &op : ops) {
op->Run();
}
}
return output_tensor_sptrs;
......@@ -139,14 +135,11 @@ class Executor4Test : public Executor<DeviceType> {
auto *output_tensor = con_output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>(dDim);
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size();
++j) {
auto op = this->ops_of_block_[*to_predict_block.get()][j];
op->Run();
for (auto &ops : this->ops_of_block_) {
for (auto &op : ops) {
op->Run();
}
}
return std::make_shared<paddle_mobile::framework::Tensor>(
paddle_mobile::framework::Tensor(*output_tensor));
}
......
......@@ -52,15 +52,16 @@ int main(int argc, char* argv[]) {
SetupTensor<float>(&input, in_shape, 0.f, 255.f);
// warmup
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input);
paddle_mobile.Predict(input);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input);
paddle_mobile.Predict(input);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n";
std::ostringstream os("output tensor size: ");
output = paddle_mobile.Fetch();
os << output->numel() << "\n" << output->data<float>()[0];
for (int i = 1; i < output->numel(); ++i) {
os << ", " << output->data<float>()[i];
......
......@@ -36,11 +36,11 @@ int main() {
input_tensor.data<float>() + input_tensor.numel());
// 预热十次
for (int i = 0; i < 1; ++i) {
paddle_mobile.PredictLod(input_tensor);
paddle_mobile.Predict(input_tensor);
}
auto time3 = time();
for (int i = 0; i < 1; ++i) {
paddle_mobile.PredictLod(input_tensor);
paddle_mobile.Predict(input_tensor);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
......
......@@ -41,12 +41,12 @@ int main(int argc, char* argv[]) {
#endif
paddle_mobile.SetThreadNum(thread_num);
auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) {
std::vector<float> output;
if (paddle_mobile.Load(g_googlenet, optimize, false, 1, true)) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input;
std::vector<float> output;
std::vector<int64_t> dims{1, 3, 224, 224};
if (feed_shape) {
sscanf(feed_shape, "%d,%d,%d", &dims[1], &dims[2], &dims[3]);
......
......@@ -48,8 +48,8 @@ int main() {
DLOG << "words lod 22: " << words.lod();
auto time3 = time();
for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.PredictLod(words);
DLOG << *vec_result;
paddle_mobile.Predict(words);
DLOG << *paddle_mobile.Fetch();
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
......@@ -84,8 +84,8 @@ int main() {
DLOG << "words lod 22: " << words.lod();
auto time3 = time();
for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.PredictLod(words);
DLOG << *vec_result;
paddle_mobile.Predict(words);
DLOG << *paddle_mobile.Fetch();
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 1 << "ms"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
void load_images(const char *image_dir, const char *images_list,
std::vector<std::string> *image_names,
std::vector<std::pair<int, int>> *image_shapes) {
int height, width;
std::string filename;
std::ifstream if_list(images_list, std::ios::in);
while (!if_list.eof()) {
if_list >> height >> width >> filename;
image_shapes->push_back(std::make_pair(height, width));
image_names->push_back(filename);
}
}
int main(int argc, char **argv) {
if (argc < 4) {
std::cerr << "Usage: ./test_ocr model_dir image_dir images_list."
<< std::endl;
return 1;
}
char *model_dir = argv[1];
char *image_dir = argv[2];
char *images_list = argv[3];
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(8);
auto isok = paddle_mobile.Load(std::string(model_dir) + "/model",
std::string(model_dir) + "/params", true,
false, 1, true);
DLOG << "pass init model";
std::vector<std::string> image_names;
std::vector<std::pair<int, int>> image_shapes;
load_images(image_dir, images_list, &image_names, &image_shapes);
DLOG << "pass load images";
for (int i = 0; i < image_names.size(); i++) {
std::string file_name = image_names[i];
std::vector<float> input;
std::vector<int64_t> dims{1, 1, 48, 512};
dims[2] = image_shapes[i].first;
dims[3] = image_shapes[i].second;
// load input image
std::string img_path = std::string(image_dir) + "/" + file_name;
std::cerr << "img_path: " << img_path << std::endl;
std::cerr << "shape = [" << dims[0] << ", " << dims[1] << ", " << dims[2]
<< ", " << dims[3] << "]" << std::endl;
GetInput<float>(img_path, &input, dims);
// predict
auto output = paddle_mobile.Predict(input, dims);
// print result
std::cerr << file_name << std::endl;
std::cerr << output[0];
for (int j = 1; j < output.size(); ++j) {
std::cerr << " " << output[j];
}
std::cerr << std::endl;
}
return 0;
}
......@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "paddle_mobile_jni.cpp"); do
cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册