提交 896f52f2 编写于 作者: W WangLiu 提交者: GitHub

Merge pull request #324 from cocodark/develop

add impl for executor'predict
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
project(paddle-mobile) project(paddle-mobile)
add_definitions(-DPADDLE_MOBILE_DEBUG="true") add_definitions(-DPADDLE_MOBILE_DEBUG)
add_definitions(-DENABLE_EXCEPTION)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_BUILD_TYPE RelWithDebInfo) set(CMAKE_BUILD_TYPE RelWithDebInfo)
......
#!/usr/bin/env sh
push_fn () {
MODELS_PATH="../test/models/*"
EXE_FILE="../test/build/*"
EXE_DIR="data/local/tmp/bin"
MODELS_DIR="data/local/tmp/models"
LIB_PATH="../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR}
echo "test files sync completed"
}
push_fn
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_MOBILE_DEBUG #ifdef ENABLE_EXCEPTION
#include <stdio.h> #include <stdio.h>
#include <exception> #include <exception>
#include <sstream> #include <sstream>
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
#ifdef PADDLE_MOBILE_DEBUG #ifdef ENABLE_EXCEPTION
struct PaddleMobileException : public std::exception { struct PaddleMobileException : public std::exception {
const std::string exception_prefix = "paddle mobile C++ Exception: \n"; const std::string exception_prefix = "paddle mobile C++ Exception: \n";
std::string message; std::string message;
...@@ -64,7 +64,7 @@ struct PaddleMobileException : public std::exception { ...@@ -64,7 +64,7 @@ struct PaddleMobileException : public std::exception {
} }
#else #else
#define PADDLE_MOBILE_THROW_EXCEPTION(...) #define PADDLE_MOBILE_THROW_EXCEPTION(...)
#define PADDLE_MOBILE_ASSERT(stat, ...) #define PADDLE_MOBILE_ENFORCE(stat, ...)
#endif #endif
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -28,18 +28,6 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const { ...@@ -28,18 +28,6 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const {
return it->second.second; return it->second.second;
} }
template <typename T>
static T *GetVarValue(const string &key, const VariableNameMap &var_map,
const Scope &scope) {
auto var_vec = var_map.at(key);
if (!var_vec.empty()) {
auto var = scope.FindVar(var_vec[0]);
return var->GetMutable<T>();
} else {
return nullptr;
}
}
template <typename Dtype> template <typename Dtype>
OperatorBase<Dtype>::OperatorBase(const std::string &type, OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs, const VariableNameMap &inputs,
...@@ -60,7 +48,7 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {} ...@@ -60,7 +48,7 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype> template <typename Dtype>
void OperatorBase<Dtype>::Run() const { void OperatorBase<Dtype>::Run() const {
RunImpl(); RunImpl();
#ifdef PADDLE_MOBILE_DEBUG #if (PADDLE_MOBILE_DEBUG)
vector<string> output_keys = GetOutKeys(); vector<string> output_keys = GetOutKeys();
for (const auto key : output_keys) { for (const auto key : output_keys) {
Tensor *out_ = GetVarValue<framework::LoDTensor>(key, outputs_, *scope_); Tensor *out_ = GetVarValue<framework::LoDTensor>(key, outputs_, *scope_);
......
...@@ -39,6 +39,18 @@ namespace framework { ...@@ -39,6 +39,18 @@ namespace framework {
using std::string; using std::string;
using std::vector; using std::vector;
template <typename T>
static T *GetVarValue(const string &key, const VariableNameMap &var_map,
const Scope &scope) {
auto var_vec = var_map.at(key);
if (!var_vec.empty()) {
auto var = scope.FindVar(var_vec[0]);
return var->GetMutable<T>();
} else {
return nullptr;
}
}
template <typename Dtype> template <typename Dtype>
class OperatorBase : PaddleMobileObject { class OperatorBase : PaddleMobileObject {
public: public:
......
...@@ -371,31 +371,47 @@ void Executor<Dtype, P>::InitMemory() { ...@@ -371,31 +371,47 @@ void Executor<Dtype, P>::InitMemory() {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict(const framework::Tensor &t, int block_id) { std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
const framework::Tensor &t) {
framework::Variable *g_feed_value = program_.scope->Var("feed"); framework::Variable *g_feed_value = program_.scope->Var("feed");
framework::Tensor *feed_tensor = framework::Tensor *feed_tensor =
g_feed_value->GetMutable<framework::LoDTensor>(); g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims()); feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t); feed_tensor->ShareDataWith(t);
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(block_id); to_predict_program_->Block(0);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j]; auto op = ops_of_block_[*to_predict_block.get()][j];
op->Run(); op->Run();
} }
auto ops = ops_of_block_[*to_predict_program_->Block(0)];
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
PADDLE_MOBILE_ENFORCE(out_keys.size() > 0, "the last op contains no output");
framework::LoDTensor *output_tensor =
framework::GetVarValue<framework::LoDTensor>(out_keys[0], output_map,
*(program_.scope));
return std::shared_ptr<framework::Tensor>(output_tensor);
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
const framework::Tensor &t, int block_id) {
return Predict(t);
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) { const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims)); framework::Tensor tensor(input, framework::make_ddim(dims));
std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0);
Predict(tensor, 0); Executor<Dtype, P>::Ptype *output_ptr =
output_tensor->data<typename Executor<Dtype, P>::Ptype>();
framework::Variable *g_feed_value = program_.scope->Var("col"); std::vector<typename Executor<Dtype, P>::Ptype> result_vector;
auto feed_tensor = g_feed_value->GetMutable<framework::Tensor>(); for (int j = 0; j < output_tensor->numel(); ++j) {
result_vector.push_back(output_ptr[j]);
return {}; }
return result_vector;
} }
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <memory.h> #include <memory.h>
#include <map>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -44,24 +45,25 @@ class Executor { ...@@ -44,24 +45,25 @@ class Executor {
public: public:
typedef typename PrecisionTrait<P>::ptype Ptype; typedef typename PrecisionTrait<P>::ptype Ptype;
Executor() = default;
Executor(const framework::Program<Dtype> p, int batch_size = 1, Executor(const framework::Program<Dtype> p, int batch_size = 1,
bool use_optimize = true); bool use_optimize = true);
// std::shared_ptr<framework::Tensor> Predict(framework::Tensor &t); std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
std::vector<Ptype> Predict(const std::vector<Ptype> &input, std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
protected: protected:
Executor() = default;
void InitMemory(); void InitMemory();
void LoadMemory(const framework::VarDesc var_desc, void LoadMemory(const framework::VarDesc var_desc,
framework::LoDTensor *tensor, const std::string &file_path); framework::LoDTensor *tensor, const std::string &file_path);
framework::Program<Dtype> program_; framework::Program<Dtype> program_;
int batch_size_ = 1; int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_; std::shared_ptr<framework::ProgramDesc> to_predict_program_;
void Predict(const framework::Tensor &t, int block_id); std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t,
int block_id);
std::map<framework::BlockDesc, std::map<framework::BlockDesc,
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>> std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_; ops_of_block_;
......
...@@ -136,10 +136,16 @@ class SoftmaxFuntor<CPU, T> { ...@@ -136,10 +136,16 @@ class SoftmaxFuntor<CPU, T> {
public: public:
void operator()(const framework::Tensor *X, framework::Tensor *Y) { void operator()(const framework::Tensor *X, framework::Tensor *Y) {
const DDim dDim = X->dims();
for (int i = 0; i < dDim[0]; ++i) {
framework::Tensor sub_X = X->Slice(i, i + 1);
framework::Tensor sub_Y = Y->Slice(i, i + 1);
#if __ARM_NEON #if __ARM_NEON
SoftmaxCacl(X, Y); SoftmaxCacl(&sub_X, &sub_Y);
#endif #endif
} }
}
}; };
template class SoftmaxFuntor<CPU, float>; template class SoftmaxFuntor<CPU, float>;
......
...@@ -18,20 +18,17 @@ limitations under the License. */ ...@@ -18,20 +18,17 @@ limitations under the License. */
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto time1 = time(); auto time1 = time();
auto program = loader.Load(g_googlenet, false); auto program = loader.Load(g_googlenet, false);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); GetInput<float>(g_test_image_1x3x224x224, &input, dims);
auto time3 = time(); auto time3 = time();
executor.Predict(input, dims); executor.Predict(input, dims);
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; DLOG << "predict cost :" << time_diff(time3, time4) << "ms\n";
return 0; return 0;
} }
...@@ -22,17 +22,22 @@ int main() { ...@@ -22,17 +22,22 @@ int main() {
auto program = loader.Load(g_mobilenet, false); auto program = loader.Load(g_mobilenet, false);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 2, false);
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{2, 3, 224, 224};
Tensor input_tensor; Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0), SetupTensor<float>(&input_tensor, {2, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1)); static_cast<float>(1));
std::vector<float> input(input_tensor.data<float>(), std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel()); input_tensor.data<float>() + input_tensor.numel());
auto time3 = time(); auto time3 = time();
executor.Predict(input, dims); auto vec_result = executor.Predict(input, dims);
float sum = 0;
for (const auto item : vec_result) {
sum += item;
}
DLOG << "mobilenet output sum =" << sum;
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册