diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 1e3bb7bf16f969255dba6f6ec7a6a70bbb1e07ee..f279020e9334323ebdf3125a8833044cd9eccae5 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -24,31 +24,37 @@ set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library") set(inference_deps paddle_inference_api paddle_fluid_api) # if anakin is set enable anakin api implementation -if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY) +if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY) set(ANAKIN_FOUND ON) else() set(ANAKIN_FOUND OFF) endif() +function(fetch_include_recursively root_dir) + if (IS_DIRECTORY ${root_dir}) + include_directories(${root_dir}) + endif() + + file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*) + foreach(sub ${ALL_SUB}) + if (IS_DIRECTORY ${root_dir}/${sub}) + fetch_include_recursively(${root_dir}/${sub}) + endif() + endforeach() +endfunction() + if (ANAKIN_FOUND) # Anakin's code style doesn't follow google c style. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=comment - -Wno-error=reorder - -Wno-error=format - -Wno-error=switch - -Wno-error=return-type - -Wno-error=non-virtual-dtor - -Wno-error=cpp") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp") message(STATUS "Anakin for inference is enabled") message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}") - include_directories("${ANAKIN_INCLUDE}") - # Anakin's source path is a mass, need to set sub-directories trivially. - include_directories("${ANAKIN_INCLUDE}/saber") - link_directories("${ANAKIN_LIBRARY}") + fetch_include_recursively(${ANAKIN_INCLUDE}) + + link_directories(${ANAKIN_LIBRARY}) - nv_library(inference_anakin_api SRCS paddle_inference_api_anakin_engine.cc) - target_link_libraries(inference_anakin_api anakin) + nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc) + target_link_libraries(inference_anakin_api anakin anakin_saber_common) list(APPEND inference_deps inference_anakin_api) endif() @@ -73,7 +79,7 @@ function(inference_api_test TARGET_NAME) endfunction(inference_api_test) cc_library(paddle_inference_api - SRCS paddle_inference_api.cc paddle_inference_api_impl.cc + SRCS paddle_inference_api.cc paddle_inference_api_impl.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) cc_test(test_paddle_inference_api @@ -84,8 +90,8 @@ inference_api_test(test_paddle_inference_api_impl ARGS test_word2vec test_image_classification) if (ANAKIN_FOUND) - nv_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc - DEPS ${inference_deps} protobuf) + cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc + DEPS ${inference_deps}) endif() if(WITH_TESTING) diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index c4588cf04030b9627dbe9b40c1bb04d1e782ebba..77e2d77b6b7fe3eeed865c8de0818d059cfa6c6e 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -113,5 +113,4 @@ struct AnakinConfig : public PaddlePredictor::Config { // Similarly, each engine kind should map to a unique predictor implementation. template std::unique_ptr CreatePaddlePredictor(const ConfigT& config); - } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc index 865d7ac10db55ce9565f4b1a35defa2a3d1d40ef..ea7781f691da81befd5d11c226c35e1da79baaaa 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc @@ -24,8 +24,16 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( } bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { - // TODO(Superjomn) Tell anakin to support return code. - engine_.Build(config.model_file, config.max_batch_size); + if (!(graph_.load(config.model_file))) { + return false; + } + graph_.ResetBatchSize("input_0", config.max_batch_size); + // optimization for graph + if (!(graph_.Optimize())) { + return false; + } + // construct executer + executor_.init(graph_); return true; } @@ -38,24 +46,30 @@ bool PaddleInferenceAnakinPredictor::Run( << "'s type is not float"; return false; } - engine_.SetInputFromCPU( - input.name, static_cast(input.data.data), input.data.length); + auto d_tensor_in_p = executor_.get_in(input.name); + float *d_data_p = d_tensor_in_p->mutable_data(); + if (cudaMemcpy(d_data_p, + static_cast(input.data.data), + d_tensor_in_p->valid_size() * sizeof(float), + cudaMemcpyHostToDevice) != 0) { + LOG(ERROR) << "copy data from CPU to GPU error"; + return false; + } } - // TODO(Superjomn) Tell anakin to support return code. - engine_.Execute(); + executor_.prediction(); if (output_data->empty()) { LOG(ERROR) << "At least one output should be set with tensors' names."; return false; } for (auto &output : *output_data) { - auto *tensor = engine_.GetOutputInGPU(output.name); + auto *tensor = executor_.get_out(output.name); output.shape = tensor->shape(); // Copy data from GPU -> CPU if (cudaMemcpy(output.data.data, - tensor->data(), - tensor->size(), + tensor->mutable_data(), + tensor->valid_size() * sizeof(float), cudaMemcpyDeviceToHost) != 0) { LOG(ERROR) << "copy data from GPU to CPU error"; return false; @@ -64,9 +78,26 @@ bool PaddleInferenceAnakinPredictor::Run( return true; } -// TODO(Superjomn) To implement latter. +anakin::Net + &PaddleInferenceAnakinPredictor::get_executer() { + return executor_; +} + +// the cloned new Predictor of anakin share the same net weights from original +// Predictor std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { - return nullptr; + VLOG(3) << "Anakin Predictor::clone"; + std::unique_ptr cls(new PaddleInferenceAnakinPredictor()); + // construct executer from other graph + auto anakin_predictor_p = + dynamic_cast(cls.get()); + if (!anakin_predictor_p) { + LOG(ERROR) << "fail to call Init"; + return nullptr; + } + anakin_predictor_p->get_executer().init(graph_); + + return std::move(cls); } // A factory to help create difference predictor. @@ -74,6 +105,7 @@ template <> std::unique_ptr CreatePaddlePredictor( const AnakinConfig &config) { + VLOG(3) << "Anakin Predictor create."; std::unique_ptr x( new PaddleInferenceAnakinPredictor(config)); return x; diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine.h b/paddle/contrib/inference/paddle_inference_api_anakin_engine.h index fe9f562e9d1d40c30585bcb68fa51e445bedb4aa..181784cbdf91fe2f50e20f4d447448a42a18d301 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.h +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.h @@ -20,32 +20,42 @@ limitations under the License. */ #pragma once // NOTE This header file do not have namespace. -// TODO(Superjomn) Tell Anakin to provide better APIs. -#include +//#include #include "paddle/contrib/inference/paddle_inference_api.h" +#include "framework/core/net/net.h" +#include "saber/saber_types.h" + namespace paddle { class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: + PaddleInferenceAnakinPredictor() {} + PaddleInferenceAnakinPredictor(const AnakinConfig& config); // NOTE Unlike the native engine, the buffers of anakin engine's output_data // should be allocated first. - // TODO(Superjomn) should unify all the behaviors of output_data accross all - // the engines. bool Run(const std::vector& inputs, std::vector* output_data) override; std::unique_ptr Clone() override; + anakin::Net& + get_executer(); + + ~PaddleInferenceAnakinPredictor() override{}; + private: bool Init(const AnakinConfig& config); - anakin::AnakinEngine - engine_; + graph_; + anakin::Net + executor_; + AnakinConfig config_; }; } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc index 43324bc67cba16c36d9dbcb58ccde1c57293085e..47b9c6fa285b623d2b08f45917cb3474dbc2ab83 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc @@ -12,16 +12,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/contrib/inference/paddle_inference_api.h" +#include #include +#include "gflags/gflags.h" +#include "paddle/contrib/inference/paddle_inference_api.h" + namespace paddle { -TEST(inference, anakin) { +AnakinConfig GetConfig() { AnakinConfig config; + config.model_file = "./mobilenet_v2.anakin.bin"; + config.device = 0; + config.max_batch_size = 1; + return config; +} - auto engine = +TEST(inference, anakin) { + AnakinConfig config = GetConfig(); + auto predictor = CreatePaddlePredictor(config); + + float data[1 * 3 * 224 * 224] = {1.0f}; + + PaddleBuf buf{.data = data, .length = sizeof(data)}; + PaddleTensor tensor{.name = "input_0", + .shape = std::vector({1, 3, 224, 224}), + .data = buf, + .dtype = PaddleDType::FLOAT32}; + + // For simplicity, we set all the slots with the same data. + std::vector paddle_tensor_feeds(1, tensor); + + float data_out[1000]; + + PaddleBuf buf_out{.data = data_out, .length = sizeof(data)}; + PaddleTensor tensor_out{.name = "prob_out", + .shape = std::vector({1000, 1}), + .data = buf_out, + .dtype = PaddleDType::FLOAT32}; + + std::vector outputs(1, tensor_out); + + ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); + + float* data_o = static_cast(outputs[0].data.data); + for (size_t j = 0; j < 1000; ++j) { + LOG(INFO) << "output[" << j << "]: " << data_o[j]; + } } } // namespace paddle