提交 369dfb3d 编写于 作者: L Luo Tao

move contrib/inference to paddle/fluid/inference/api

上级 866fcb0c
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
add_subdirectory(inference)
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
#include <cuda.h> #include <cuda.h>
#include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
namespace paddle { namespace paddle {
...@@ -47,8 +47,7 @@ bool PaddleInferenceAnakinPredictor::Run( ...@@ -47,8 +47,7 @@ bool PaddleInferenceAnakinPredictor::Run(
} }
auto d_tensor_in_p = executor_.get_in(input.name); auto d_tensor_in_p = executor_.get_in(input.name);
float *d_data_p = d_tensor_in_p->mutable_data(); float *d_data_p = d_tensor_in_p->mutable_data();
if (cudaMemcpy(d_data_p, if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
static_cast<float *>(input.data.data()),
d_tensor_in_p->valid_size() * sizeof(float), d_tensor_in_p->valid_size() * sizeof(float),
cudaMemcpyHostToDevice) != 0) { cudaMemcpyHostToDevice) != 0) {
LOG(ERROR) << "copy data from CPU to GPU error"; LOG(ERROR) << "copy data from CPU to GPU error";
...@@ -70,8 +69,7 @@ bool PaddleInferenceAnakinPredictor::Run( ...@@ -70,8 +69,7 @@ bool PaddleInferenceAnakinPredictor::Run(
output.data.Resize(tensor->valid_size() * sizeof(float)); output.data.Resize(tensor->valid_size() * sizeof(float));
} }
// Copy data from GPU -> CPU // Copy data from GPU -> CPU
if (cudaMemcpy(output.data.data(), if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
tensor->mutable_data(),
tensor->valid_size() * sizeof(float), tensor->valid_size() * sizeof(float),
cudaMemcpyDeviceToHost) != 0) { cudaMemcpyDeviceToHost) != 0) {
LOG(ERROR) << "copy data from GPU to CPU error"; LOG(ERROR) << "copy data from GPU to CPU error";
...@@ -106,9 +104,8 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() { ...@@ -106,9 +104,8 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
// A factory to help create difference predictor. // A factory to help create difference predictor.
template <> template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>( AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
const AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create."; VLOG(3) << "Anakin Predictor create.";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor(config)); new PaddleInferenceAnakinPredictor(config));
......
...@@ -48,8 +48,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { ...@@ -48,8 +48,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
private: private:
bool Init(const AnakinConfig& config); bool Init(const AnakinConfig& config);
anakin::graph::Graph<anakin::NV, anakin::graph::Graph<anakin::NV, anakin::saber::AK_FLOAT,
anakin::saber::AK_FLOAT,
anakin::Precision::FP32> anakin::Precision::FP32>
graph_; graph_;
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32> anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
......
...@@ -77,8 +77,8 @@ bool NativePaddlePredictor::Init( ...@@ -77,8 +77,8 @@ bool NativePaddlePredictor::Init(
if (!config_.model_dir.empty()) { if (!config_.model_dir.empty()) {
// Parameters are saved in separate files sited in // Parameters are saved in separate files sited in
// the specified `dirname`. // the specified `dirname`.
inference_program_ = paddle::inference::Load( inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
executor_.get(), scope_.get(), config_.model_dir); config_.model_dir);
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) { } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
// All parameters are saved in a single file. // All parameters are saved in a single file.
// The file names should be consistent with that used // The file names should be consistent with that used
...@@ -91,8 +91,8 @@ bool NativePaddlePredictor::Init( ...@@ -91,8 +91,8 @@ bool NativePaddlePredictor::Init(
} }
ctx_ = executor_->Prepare(*inference_program_, 0); ctx_ = executor_->Prepare(*inference_program_, 0);
executor_->CreateVariables( executor_->CreateVariables(*inference_program_,
*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames(); feed_target_names_ = inference_program_->GetFeedTargetNames();
...@@ -134,10 +134,8 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -134,10 +134,8 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
// if share variables, we need not create variables // if share variables, we need not create variables
VLOG(4) << "Run prepared context"; VLOG(4) << "Run prepared context";
executor_->RunPreparedContext( executor_->RunPreparedContext(
ctx_.get(), ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
sub_scope_ != nullptr ? sub_scope_ : scope_.get(), &feed_targets, &fetch_targets,
&feed_targets,
&fetch_targets,
false /* don't create variable eatch time */); false /* don't create variable eatch time */);
VLOG(4) << "Finish prepared context"; VLOG(4) << "Finish prepared context";
if (!GetFetch(fetchs, output_data)) { if (!GetFetch(fetchs, output_data)) {
...@@ -181,8 +179,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -181,8 +179,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
} }
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
inputs[i].data.data(),
inputs[i].data.length()); inputs[i].data.length());
feeds->push_back(input); feeds->push_back(input);
} }
...@@ -232,8 +229,7 @@ bool NativePaddlePredictor::GetFetch( ...@@ -232,8 +229,7 @@ bool NativePaddlePredictor::GetFetch(
size_t start = lod[0][j - 1] * common_dim; size_t start = lod[0][j - 1] * common_dim;
size_t end = lod[0][j] * common_dim; size_t end = lod[0][j] * common_dim;
if (end > start) { if (end > start) {
std::copy(output_ptr + start, std::copy(output_ptr + start, output_ptr + end,
output_ptr + end,
data.begin() + (j - 1) * max_dim * common_dim); data.begin() + (j - 1) * max_dim * common_dim);
} }
} }
...@@ -257,15 +253,13 @@ bool NativePaddlePredictor::GetFetch( ...@@ -257,15 +253,13 @@ bool NativePaddlePredictor::GetFetch(
} }
template <> template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>( NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor"; VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
config.fraction_of_gpu_memory, config.fraction_of_gpu_memory, 0.f,
0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"); "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
std::vector<std::string> flags; std::vector<std::string> flags;
......
...@@ -77,8 +77,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -77,8 +77,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
ctx_ = executor_->Prepare(*inference_program_, 0); ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(5) << "to create variables"; VLOG(5) << "to create variables";
executor_->CreateVariables( executor_->CreateVariables(*inference_program_,
*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames(); feed_target_names_ = inference_program_->GetFeedTargetNames();
...@@ -98,8 +98,7 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( ...@@ -98,8 +98,7 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
config.fraction_of_gpu_memory, config.fraction_of_gpu_memory, 0.f,
0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"); "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
std::vector<std::string> flags; std::vector<std::string> flags;
......
...@@ -63,8 +63,8 @@ void Main(bool use_gpu) { ...@@ -63,8 +63,8 @@ void Main(bool use_gpu) {
PADDLE_ENFORCE(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = { float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
...@@ -107,8 +107,8 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -107,8 +107,8 @@ void MainThreads(int num_threads, bool use_gpu) {
PADDLE_ENFORCE(outputs.size(), 1UL); PADDLE_ENFORCE(outputs.size(), 1UL);
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); PADDLE_ENFORCE(outputs.front().data.length(), 33168UL);
float result[5] = { float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; 0.000932706};
const size_t num_elements = const size_t num_elements =
outputs.front().data.length() / sizeof(float); outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
......
...@@ -21,8 +21,7 @@ ...@@ -21,8 +21,7 @@
namespace paddle { namespace paddle {
namespace demo { namespace demo {
static void split(const std::string& str, static void split(const std::string& str, char sep,
char sep,
std::vector<std::string>* pieces) { std::vector<std::string>* pieces) {
pieces->clear(); pieces->clear();
if (str.empty()) { if (str.empty()) {
......
...@@ -29,8 +29,7 @@ DECLARE_double(fraction_of_gpu_memory_to_use); ...@@ -29,8 +29,7 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
DEFINE_string(modeldir, "", "Directory of the inference model."); DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(refer, "", "path to reference result for comparison."); DEFINE_string(refer, "", "path to reference result for comparison.");
DEFINE_string( DEFINE_string(
data, data, "",
"",
"path of data; each line is a record, format is " "path of data; each line is a record, format is "
"'<space splitted floats as data>\t<space splitted ints as shape'"); "'<space splitted floats as data>\t<space splitted ints as shape'");
DEFINE_bool(use_gpu, false, "Whether use gpu."); DEFINE_bool(use_gpu, false, "Whether use gpu.");
......
...@@ -121,8 +121,8 @@ void MainImageClassification(bool use_gpu) { ...@@ -121,8 +121,8 @@ void MainImageClassification(bool use_gpu) {
// which should be in the range [0.0, 1.0]. // which should be in the range [0.0, 1.0].
feed_target_shapes[0][0] = batch_size; feed_target_shapes[0][0] = batch_size;
framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]); framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
SetupTensor<float>( SetupTensor<float>(&input, input_dims, static_cast<float>(0),
&input, input_dims, static_cast<float>(0), static_cast<float>(1)); static_cast<float>(1));
std::vector<framework::LoDTensor*> cpu_feeds; std::vector<framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&input); cpu_feeds.push_back(&input);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册