提交 9141bee1 编写于 作者: C cuichaowen 提交者: Yan Chunwei

add Anakin api for paddle (#11228)

上级 d48172f2
...@@ -24,31 +24,37 @@ set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library") ...@@ -24,31 +24,37 @@ set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library")
set(inference_deps paddle_inference_api paddle_fluid_api) set(inference_deps paddle_inference_api paddle_fluid_api)
# if anakin is set enable anakin api implementation # if anakin is set enable anakin api implementation
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY) if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY)
set(ANAKIN_FOUND ON) set(ANAKIN_FOUND ON)
else() else()
set(ANAKIN_FOUND OFF) set(ANAKIN_FOUND OFF)
endif() endif()
function(fetch_include_recursively root_dir)
if (IS_DIRECTORY ${root_dir})
include_directories(${root_dir})
endif()
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
foreach(sub ${ALL_SUB})
if (IS_DIRECTORY ${root_dir}/${sub})
fetch_include_recursively(${root_dir}/${sub})
endif()
endforeach()
endfunction()
if (ANAKIN_FOUND) if (ANAKIN_FOUND)
# Anakin's code style doesn't follow google c style. # Anakin's code style doesn't follow google c style.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=comment set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp")
-Wno-error=reorder
-Wno-error=format
-Wno-error=switch
-Wno-error=return-type
-Wno-error=non-virtual-dtor
-Wno-error=cpp")
message(STATUS "Anakin for inference is enabled") message(STATUS "Anakin for inference is enabled")
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}") message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
include_directories("${ANAKIN_INCLUDE}") fetch_include_recursively(${ANAKIN_INCLUDE})
# Anakin's source path is a mass, need to set sub-directories trivially.
include_directories("${ANAKIN_INCLUDE}/saber") link_directories(${ANAKIN_LIBRARY})
link_directories("${ANAKIN_LIBRARY}")
nv_library(inference_anakin_api SRCS paddle_inference_api_anakin_engine.cc) nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
target_link_libraries(inference_anakin_api anakin) target_link_libraries(inference_anakin_api anakin anakin_saber_common)
list(APPEND inference_deps inference_anakin_api) list(APPEND inference_deps inference_anakin_api)
endif() endif()
...@@ -73,7 +79,7 @@ function(inference_api_test TARGET_NAME) ...@@ -73,7 +79,7 @@ function(inference_api_test TARGET_NAME)
endfunction(inference_api_test) endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
...@@ -84,8 +90,8 @@ inference_api_test(test_paddle_inference_api_impl ...@@ -84,8 +90,8 @@ inference_api_test(test_paddle_inference_api_impl
ARGS test_word2vec test_image_classification) ARGS test_word2vec test_image_classification)
if (ANAKIN_FOUND) if (ANAKIN_FOUND)
nv_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
DEPS ${inference_deps} protobuf) DEPS ${inference_deps})
endif() endif()
if(WITH_TESTING) if(WITH_TESTING)
......
...@@ -113,5 +113,4 @@ struct AnakinConfig : public PaddlePredictor::Config { ...@@ -113,5 +113,4 @@ struct AnakinConfig : public PaddlePredictor::Config {
// Similarly, each engine kind should map to a unique predictor implementation. // Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative> template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config); std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle } // namespace paddle
...@@ -24,8 +24,16 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( ...@@ -24,8 +24,16 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
} }
bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
// TODO(Superjomn) Tell anakin to support return code. if (!(graph_.load(config.model_file))) {
engine_.Build(config.model_file, config.max_batch_size); return false;
}
graph_.ResetBatchSize("input_0", config.max_batch_size);
// optimization for graph
if (!(graph_.Optimize())) {
return false;
}
// construct executer
executor_.init(graph_);
return true; return true;
} }
...@@ -38,24 +46,30 @@ bool PaddleInferenceAnakinPredictor::Run( ...@@ -38,24 +46,30 @@ bool PaddleInferenceAnakinPredictor::Run(
<< "'s type is not float"; << "'s type is not float";
return false; return false;
} }
engine_.SetInputFromCPU( auto d_tensor_in_p = executor_.get_in(input.name);
input.name, static_cast<float *>(input.data.data), input.data.length); float *d_data_p = d_tensor_in_p->mutable_data();
if (cudaMemcpy(d_data_p,
static_cast<float *>(input.data.data),
d_tensor_in_p->valid_size() * sizeof(float),
cudaMemcpyHostToDevice) != 0) {
LOG(ERROR) << "copy data from CPU to GPU error";
return false;
}
} }
// TODO(Superjomn) Tell anakin to support return code. executor_.prediction();
engine_.Execute();
if (output_data->empty()) { if (output_data->empty()) {
LOG(ERROR) << "At least one output should be set with tensors' names."; LOG(ERROR) << "At least one output should be set with tensors' names.";
return false; return false;
} }
for (auto &output : *output_data) { for (auto &output : *output_data) {
auto *tensor = engine_.GetOutputInGPU(output.name); auto *tensor = executor_.get_out(output.name);
output.shape = tensor->shape(); output.shape = tensor->shape();
// Copy data from GPU -> CPU // Copy data from GPU -> CPU
if (cudaMemcpy(output.data.data, if (cudaMemcpy(output.data.data,
tensor->data(), tensor->mutable_data(),
tensor->size(), tensor->valid_size() * sizeof(float),
cudaMemcpyDeviceToHost) != 0) { cudaMemcpyDeviceToHost) != 0) {
LOG(ERROR) << "copy data from GPU to CPU error"; LOG(ERROR) << "copy data from GPU to CPU error";
return false; return false;
...@@ -64,9 +78,26 @@ bool PaddleInferenceAnakinPredictor::Run( ...@@ -64,9 +78,26 @@ bool PaddleInferenceAnakinPredictor::Run(
return true; return true;
} }
// TODO(Superjomn) To implement latter. anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
&PaddleInferenceAnakinPredictor::get_executer() {
return executor_;
}
// the cloned new Predictor of anakin share the same net weights from original
// Predictor
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() { std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
return nullptr; VLOG(3) << "Anakin Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
// construct executer from other graph
auto anakin_predictor_p =
dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
if (!anakin_predictor_p) {
LOG(ERROR) << "fail to call Init";
return nullptr;
}
anakin_predictor_p->get_executer().init(graph_);
return std::move(cls);
} }
// A factory to help create difference predictor. // A factory to help create difference predictor.
...@@ -74,6 +105,7 @@ template <> ...@@ -74,6 +105,7 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>( CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(
const AnakinConfig &config) { const AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create.";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor(config)); new PaddleInferenceAnakinPredictor(config));
return x; return x;
......
...@@ -20,32 +20,42 @@ limitations under the License. */ ...@@ -20,32 +20,42 @@ limitations under the License. */
#pragma once #pragma once
// NOTE This header file do not have namespace. // NOTE This header file do not have namespace.
// TODO(Superjomn) Tell Anakin to provide better APIs. //#include <test/framework/net/paddle_api.h>
#include <test/framework/net/paddle_api.h>
#include "paddle/contrib/inference/paddle_inference_api.h" #include "paddle/contrib/inference/paddle_inference_api.h"
#include "framework/core/net/net.h"
#include "saber/saber_types.h"
namespace paddle { namespace paddle {
class PaddleInferenceAnakinPredictor : public PaddlePredictor { class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public: public:
PaddleInferenceAnakinPredictor() {}
PaddleInferenceAnakinPredictor(const AnakinConfig& config); PaddleInferenceAnakinPredictor(const AnakinConfig& config);
// NOTE Unlike the native engine, the buffers of anakin engine's output_data // NOTE Unlike the native engine, the buffers of anakin engine's output_data
// should be allocated first. // should be allocated first.
// TODO(Superjomn) should unify all the behaviors of output_data accross all
// the engines.
bool Run(const std::vector<PaddleTensor>& inputs, bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data) override; std::vector<PaddleTensor>* output_data) override;
std::unique_ptr<PaddlePredictor> Clone() override; std::unique_ptr<PaddlePredictor> Clone() override;
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
get_executer();
~PaddleInferenceAnakinPredictor() override{};
private: private:
bool Init(const AnakinConfig& config); bool Init(const AnakinConfig& config);
anakin::AnakinEngine<anakin::NV, anakin::graph::Graph<anakin::NV,
anakin::saber::AK_FLOAT, anakin::saber::AK_FLOAT,
anakin::Precision::FP32> anakin::Precision::FP32>
engine_; graph_;
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
executor_;
AnakinConfig config_;
}; };
} // namespace paddle } // namespace paddle
...@@ -12,16 +12,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,16 +12,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h" #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "paddle/contrib/inference/paddle_inference_api.h"
namespace paddle { namespace paddle {
TEST(inference, anakin) { AnakinConfig GetConfig() {
AnakinConfig config; AnakinConfig config;
config.model_file = "./mobilenet_v2.anakin.bin";
config.device = 0;
config.max_batch_size = 1;
return config;
}
auto engine = TEST(inference, anakin) {
AnakinConfig config = GetConfig();
auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config); CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
float data[1 * 3 * 224 * 224] = {1.0f};
PaddleBuf buf{.data = data, .length = sizeof(data)};
PaddleTensor tensor{.name = "input_0",
.shape = std::vector<int>({1, 3, 224, 224}),
.data = buf,
.dtype = PaddleDType::FLOAT32};
// For simplicity, we set all the slots with the same data.
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
float data_out[1000];
PaddleBuf buf_out{.data = data_out, .length = sizeof(data)};
PaddleTensor tensor_out{.name = "prob_out",
.shape = std::vector<int>({1000, 1}),
.data = buf_out,
.dtype = PaddleDType::FLOAT32};
std::vector<PaddleTensor> outputs(1, tensor_out);
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
float* data_o = static_cast<float*>(outputs[0].data.data);
for (size_t j = 0; j < 1000; ++j) {
LOG(INFO) << "output[" << j << "]: " << data_o[j];
}
} }
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册