提交 3f2de7a2 编写于 作者: Z zhangyang

implement predict_from_to for FPGA track

上级 782406aa
......@@ -93,6 +93,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
depManager[i].analysisDep(ops_of_block_[*block_desc.get()]);
#endif
}
DLOG << "Total " << ops.size() << " ops have been created ";
}
if (program_.combined) {
InitCombineMemory();
......@@ -643,6 +644,68 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
return result_vector;
}
#ifdef PADDLE_MOBILE_FPGA
template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
framework::Variable *g_feed_value = program_.scope->Var("feed");
framework::Tensor *feed_tensor =
g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t);
};
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult() {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
};
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? (int)ops.size() : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
#ifdef PADDLE_MOBILE_PROFILE
std::vector<ProfInfo> profile(ops.size());
#endif
for (int i = start; i < end; i++) {
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
ops[i]->Run();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
};
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start);
};
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end);
};
#endif
template class Executor<CPU, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>;
......
......@@ -92,6 +92,15 @@ class Executor {
bool varInputMemory(const std::shared_ptr<framework::VarDesc> &var_desc,
framework::Variable *var,
framework::LoDTensor *tensor) const;
#ifdef PADDLE_MOBILE_FPGA
public:
void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult();
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
void Predict_To(int end);
#endif
};
} // namespace paddle_mobile
......@@ -124,6 +124,33 @@ PaddleMobile<Dtype, P>::~PaddleMobile() {
loader_ = nullptr;
}
#ifdef PADDLE_MOBILE_FPGA
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
return executor_->FeedData(t);
};
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult() {
return executor_->FetchResult();
};
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_From_To(int start, int end) {
executor_->Predict_From_To(start, end);
};
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_From(int start) {
executor_->Predict_From(start);
};
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::Predict_To(int end) {
executor_->Predict_To(end);
};
#endif
template class PaddleMobile<CPU, Precision::FP32>;
template class PaddleMobile<FPGA, Precision::FP32>;
......
......@@ -92,6 +92,15 @@ class PaddleMobile {
private:
std::shared_ptr<Loader<Dtype, P>> loader_;
std::shared_ptr<Executor<Dtype, P>> executor_;
#ifdef PADDLE_MOBILE_FPGA
public:
void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult();
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
void Predict_To(int end);
#endif
};
} // namespace paddle_mobile
......@@ -39,7 +39,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
poolArgs.image.pad_width = (uint32_t)paddings[1];
poolArgs.image.scale_address = input->scale;
poolArgs.output.address = output_ptr;
poolArgs.output.scale_address = input->scale;
poolArgs.output.scale_address = output->scale;
poolArgs.kernel.height = (uint32_t)ksize[0];
poolArgs.kernel.width = (uint32_t)ksize[1];
poolArgs.kernel.stride_h = (uint32_t)strides[0];
......
......@@ -36,7 +36,8 @@ int main() {
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
// 预热十次
#ifndef PADDLE_MOBILE_FPGA
// 预热十次
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
......@@ -47,7 +48,17 @@ int main() {
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
}
#else
auto time3 = time();
paddle_mobile.FeedData(input_tensor);
paddle_mobile.Predict_To(10);
paddle_mobile.Predict_From(10);
paddle_mobile.FetchResult();
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
#endif
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册