未验证 提交 551ff882 编写于 作者: J jjyaoao 提交者: GitHub

[Test Mv] remove infrt (#52270)

上级 54497c47
...@@ -7,7 +7,6 @@ add_subdirectory(scripts) ...@@ -7,7 +7,6 @@ add_subdirectory(scripts)
add_subdirectory(testing) add_subdirectory(testing)
add_subdirectory(phi) add_subdirectory(phi)
add_subdirectory(infrt)
add_subdirectory(fluid) add_subdirectory(fluid)
add_subdirectory(ir) add_subdirectory(ir)
......
if(NOT WITH_INFRT)
return()
endif()
option(INFRT_WITH_PHI "Compile INFRT with PHI" ON)
option(INFRT_WITH_GPU "Compile INFRT with GPU" OFF)
option(INFRT_WITH_TRT "Compile INFRT with TensorRT" OFF)
#TODO(xiaowei) remove fluid
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
if(WITH_GPU)
set(INFRT_WITH_GPU ON)
endif()
if(INFRT_WITH_PHI)
add_definitions("-DINFRT_WITH_PHI")
# TODO(wilber): Now Infrt gpu/trt depends on phi's components, Modify compile dependency options later.
if(INFRT_WITH_GPU)
add_definitions("-DINFRT_WITH_GPU")
if(INFRT_WITH_TRT)
add_definitions("-DINFRT_WITH_TRT")
endif()
endif()
endif()
# compile flags
set(INFRT_FLAGS -Wno-comment)
foreach(flag ${INFRT_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach()
set(INFRT_SOURCE_DIR "${PADDLE_SOURCE_DIR}/paddle/infrt")
set(INFRT_BINARY_DIR "${PADDLE_BINARY_DIR}/paddle/infrt")
set(INFRT_TEST_TARGETS CACHE INTERNAL "")
include(infrt_lib)
set(infrt_src CACHE INTERNAL "" FORCE)
# Gather headers for library publish.
function(core_gather_headers)
file(
GLOB includes
LIST_DIRECTORIES false
RELATIVE ${CMAKE_SOURCE_DIR}
*.h)
foreach(header ${includes})
set(core_includes
"${core_includes};${header}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(gather_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(cpp ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}"
CACHE INTERNAL "")
endforeach()
endfunction()
# This method is similar to the global cc_test, but discard the huge amount default dependencies those are
# not needed by INFRT.
function(cc_test_tiny TARGET_NAME)
if(WITH_TESTING)
set(options SERIAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test_tiny "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_tiny_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${cc_test_tiny_DEPS}
${os_dependency_modules} infrt_gtest_main gtest)
add_dependencies(${TARGET_NAME} ${cc_test_tiny_DEPS} infrt_gtest_main gtest
extern_gtest)
add_test(
NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} "${cc_test_tiny_ARGS}"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(${cc_test_tiny_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
endif()
set(INFRT_TEST_TARGETS
${INFRT_TEST_TARGETS} ${TARGET_NAME}
CACHE INTERNAL "")
endif()
endfunction()
if(WITH_TESTING)
cc_library(
infrt_gtest_main
SRCS gtest_main.cc
DEPS gtest glog gflags)
endif()
add_subdirectory(api)
add_subdirectory(backends)
add_subdirectory(common)
add_subdirectory(dialect)
add_subdirectory(host_context)
add_subdirectory(kernel)
add_subdirectory(tensor)
add_subdirectory(support)
add_subdirectory(external_kernels)
add_subdirectory(paddle)
# MLIR td file generations
set(infrt_mlir_incs basic_kernels_inc test_kernels_inc tensor_shape_inc
dense_tensor_inc pd_extra_ops_inc trt_ops_inc)
if(INFRT_WITH_PHI)
set(phi_libs phi)
set(infrt_mlir_incs ${infrt_mlir_incs} MLIRinfrt_phi_tensorIncGen
MLIRinfrt_phi_baseIncGen)
endif()
cc_library(
infrt SHARED
SRCS ${infrt_src}
DEPS glog ${mlir_libs} ${phi_libs} paddle_framework_proto infrt_naive)
cc_library(
infrt_static
SRCS ${infrt_src}
DEPS glog ${mlir_libs} ${phi_libs} paddle_framework_proto)
add_dependencies(infrt ${infrt_mlir_incs} mlir-headers)
add_custom_target(test_infrt_exec DEPENDS ${INFRT_TEST_TARGETS})
core_gather_headers()
gather_srcs(infrt_src SRCS infrt_api.cc)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc.in
${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc)
# Disable temporarily for the external-kernel's mkldnn is outdate
cc_test_tiny(test_infrt_api SRCS infrt_api_test.cc DEPS infrt ${MLIR_IR_LIBS})
# TODO(inference): remove after optimize weight unfold.
set_tests_properties(test_infrt_api PROPERTIES TIMEOUT 200)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/api/infrt_api.h"
#include <llvm/ADT/SmallVector.h>
#include <llvm/Support/DynamicLibrary.h>
#include <mlir/Dialect/StandardOps/IR/Ops.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/Parser.h>
#include <mlir/Pass/PassManager.h>
#include <mlir/Transforms/Passes.h>
#include <unordered_map>
#include <vector>
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h"
#include "paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.h"
#include "paddle/infrt/dialect/mlir_loader.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h"
#include "paddle/infrt/host_context/core_runtime.h"
#include "paddle/infrt/host_context/kernel_registry.h"
#include "paddle/infrt/host_context/mlir_function_executable.h"
#include "paddle/infrt/host_context/mlir_to_runtime_translate.h"
#include "paddle/infrt/host_context/op_executable.h"
#include "paddle/infrt/host_context/paddle_mlir.h"
#include "paddle/infrt/host_context/value.h"
#include "paddle/infrt/kernel/basic_kernels.h"
#include "paddle/infrt/kernel/control_flow_kernels.h"
#include "paddle/infrt/kernel/phi/dense_tensor_kernels.h"
#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/phi/registry.h"
#include "paddle/infrt/kernel/tensor_kernels.h"
#include "paddle/infrt/kernel/tensor_shape_kernels.h"
#include "paddle/infrt/kernel/test_kernels.h"
#include "paddle/infrt/tensor/tensor_map.h"
#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
#include "paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h"
#include "paddle/infrt/kernel/tensorrt/registry.h"
#endif
using namespace infrt::host_context; // NOLINT
using namespace infrt::tensor; // NOLINT
using namespace infrt::tensor; // NOLINT
namespace infrt {
template <typename T>
std::string DumpToString(T& op) { // NOLINT
std::string buffer;
llvm::raw_string_ostream os(buffer);
op.print(os);
os.flush();
return buffer;
}
struct MlirToRuntimeTranslator::Impl {
mlir::ModuleOp module;
// The runtime for a function call.
CoreRuntimeBuilder* runtime{};
// The current working op, the translator process the ops one by one, each
// time it updates `cur_op` here to current op
// working on.
OpExecutableBuilder* cur_op{};
// record the current function name.
std::string cur_func_name;
// Name to function definitions.
std::unordered_map<std::string, mlir::FuncOp> func_defs;
// Map from an operation to its results.
std::unordered_map<const mlir::Operation*, std::vector<ValueRef>> op_results;
llvm::DenseMap<mlir::Value, ValueRef> value_map;
};
/**
* Execute the mlir program in predict mode.
*/
class PredictExecutor : public MlirToRuntimeTranslator {
public:
CoreRuntimeBuilder core_runtime;
PredictExecutor(mlir::ModuleOp module,
KernelRegistry* registry,
::infrt::phi::DenseTensorMap&& map)
: MlirToRuntimeTranslator(module, &core_runtime),
core_runtime(registry),
registry_(registry) {
CHECK(registry_);
Init(std::move(map));
}
void Run() {
auto arguments = llvm::makeArrayRef(arguments_);
auto results = llvm::makeMutableArrayRef(results_.begin(), results_.size());
function_executable_->Execute(arguments, results);
}
int GetInputNum() { return inputs_.size(); }
::Tensor* GetInput(int i) { return inputs_[i]; }
int GetOutputNum() { return outputs_.size(); }
::Tensor* GetOutput(int i) { return outputs_[i]; }
private:
void Init(::infrt::phi::DenseTensorMap&& map) {
EmitFunctions();
llvm::Optional<mlir::FuncOp> predict_func_ = llvm::None;
for (auto func_op : impl_->module.getOps<mlir::FuncOp>()) {
if (func_op.getName().str() != "main_graph") continue;
predict_func_ = func_op;
break;
}
if (!predict_func_) {
std::cout << "ERROR: init failed, no predict function found in mlir."
<< std::endl;
return;
}
auto& predict_func = predict_func_.getValue();
function_executable_ =
new MlirFunctionExecutable(predict_func, registry_, impl_->func_defs);
// process parammeters
VLOG(3) << "Arguments num of predict func: "
<< predict_func.getNumArguments();
for (size_t i = 0; i < predict_func.getNumArguments(); ++i) {
auto arg = predict_func.getArgument(i);
auto type = arg.getType();
// this param is TensorMap
if (type.isa<::infrt::phi::DenseTensorMapType>()) {
auto* value = new host_context::Value(std::move(map));
arguments_.push_back(value);
AddValue(predict_func.getArgument(i), value);
} else if (type.isa<::infrt::DenseTensorType>()) {
// this param is an input Tensor
auto dht = ::Tensor();
auto* value = new host_context::Value(std::move(dht));
arguments_.push_back(value);
inputs_.push_back(&(value->get<::Tensor>()));
} else {
llvm_unreachable("The input type has not been supported by predictor.");
}
}
// process results
auto& last_op = predict_func.front().back();
if (last_op.getName().getStringRef() == "infrt.return") {
for (size_t i = 0; i < last_op.getNumOperands(); ++i) {
auto operand = last_op.getOperand(i);
if (operand.getType().isa<::infrt::DenseTensorType>()) {
auto r = impl_->value_map.try_emplace(
operand, ValueRef(new host_context::Value(::Tensor())));
CHECK(r.second) << "Duplicate add mlir value ["
<< DumpToString(operand) << "]";
auto* value = r.first->second.get();
results_.push_back(ValueRef(value));
outputs_.push_back(&(value->get<::Tensor>()));
} else {
llvm_unreachable("infrt.return only supports DenseTensor now.");
}
}
}
}
protected:
std::unordered_map<std::string, mlir::FuncOp> func_def_table;
void EmitFunction(mlir::FuncOp op) override {
CHECK(!impl_->func_defs.count(op.getName().str()))
<< "Duplicate function defition found for function ["
<< op.getName().str();
impl_->func_defs.emplace(op.getName().str(), op);
}
private:
KernelRegistry* registry_{};
MlirFunctionExecutable* function_executable_;
llvm::SmallVector<::Tensor*, 1> inputs_;
llvm::SmallVector<host_context::Value*, 2> arguments_;
llvm::SmallVector<::Tensor*, 1> outputs_;
llvm::SmallVector<ValueRef, 1> results_;
};
std::unique_ptr<InfRtPredictor> CreateInfRtPredictor(
const InfRtConfig& config) {
auto x = std::make_unique<InfRtPredictor>();
x->Init(config);
return x;
}
struct InfRtPredictor::Impl {
std::unique_ptr<PredictExecutor> executor;
MLIRModelGenImpl module_gen_;
};
InfRtPredictor::InfRtPredictor() : impl_(new Impl) {}
InfRtPredictor::~InfRtPredictor() {}
void InfRtPredictor::Run() { impl_->executor->Run(); }
int InfRtPredictor::Init(const InfRtConfig& config) {
mlir::MLIRContext* context = ::infrt::Global::getMLIRContext();
KernelRegistry* registry = new KernelRegistry();
kernel::RegisterBasicKernels(registry);
kernel::RegisterTestKernels(registry);
kernel::RegisterTensorShapeKernels(registry);
kernel::RegisterTensorKernels(registry);
kernel::RegisterControlFlowKernels(registry);
#ifdef INFRT_WITH_PHI
kernel::RegisterPhiKernels(registry);
kernel::RegisterInferShapeLaunchers(registry);
#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
kernel::RegisterTrtKernels(registry);
#endif // INFRT_WITH_GPU && INFRT_WITH_TRT
#endif
mlir::ModuleOp module_op;
if (config.tensorrt_enabled()) {
module_op = impl_->module_gen_.ImportPaddleModel(
config.model_dir(), config.param_dir(), false);
} else {
module_op = impl_->module_gen_.ImportPaddleModel(config.model_dir(),
config.param_dir());
}
context->loadAllAvailableDialects();
::mlir::PassManager pm(context);
::mlir::OpPassManager& pass_manager = pm.nest<::mlir::FuncOp>();
if (config.tensorrt_enabled()) {
pass_manager.addPass(::infrt::CreateInfrtWeightsUnfoldPass());
#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
pass_manager.addPass(::infrt::trt::CreateTrtOpTellerPass());
pass_manager.addPass(::infrt::trt::CreateTrtGraphFusePass());
pass_manager.addPass(::infrt::trt::CreateTrtGraphSplitPass(1));
pass_manager.addPass(::infrt::trt::CreateTrtOpConverterPass());
pass_manager.addPass(::infrt::trt::CreateTrtTypeConvertPass());
#endif
pass_manager.addPass(::mlir::createCanonicalizerPass());
} else {
std::vector<::infrt::Place> valid_places = {
{::infrt::TargetType::CPU,
::infrt::PrecisionType::FLOAT32,
::infrt::LayoutType::NCHW}};
if (config.gpu_enabled()) {
valid_places.insert(valid_places.begin(),
::infrt::Place(::infrt::TargetType::GPU,
::infrt::PrecisionType::FLOAT32,
::infrt::LayoutType::NCHW));
}
pass_manager.addPass(CreatePhiOpCvtPass(valid_places));
pass_manager.addPass(CreateInfrtOpFusePass());
}
if (mlir::failed(pm.run(module_op))) {
std::cout << "\npass failed!\n" << std::endl;
return 4;
}
#ifndef NDEBUG
module_op.dump();
#endif // NDEBUG
// load extra shared library
for (const std::string& lib_path : config.shared_libs()) {
std::string err;
llvm::sys::DynamicLibrary dynLib =
llvm::sys::DynamicLibrary::getPermanentLibrary(lib_path.c_str(), &err);
if (!dynLib.isValid()) {
llvm::errs() << "Load shared library failed. Error: " << err << "\n";
return 1;
}
if (auto reg_sym = dynLib.SearchForAddressOfSymbol("RegisterKernels")) {
auto reg_func = reinterpret_cast<void (*)(KernelRegistry*)>(reg_sym);
reg_func(registry);
} else {
llvm::outs() << "Symbol \"RegisterKernels\" not found in \"" << lib_path
<< "\". Skip.\n";
}
}
// Load params
if (config.gpu_enabled() && !config.tensorrt_enabled()) {
auto tensor_map = ::infrt::kernel::phi::LoadCombinedParamsToGpu(
config.model_dir(), config.param_dir());
impl_->executor.reset(
new PredictExecutor(module_op, registry, std::move(tensor_map)));
} else {
auto tensor_map = ::infrt::kernel::phi::LoadCombinedParameters(
config.model_dir(), config.param_dir());
impl_->executor.reset(
new PredictExecutor(module_op, registry, std::move(tensor_map)));
}
return 0;
}
int InfRtPredictor::GetInputNum() { return impl_->executor->GetInputNum(); }
::Tensor* InfRtPredictor::GetInput(int i) {
return impl_->executor->GetInput(i);
}
int InfRtPredictor::GetOutputNum() { return impl_->executor->GetOutputNum(); }
::Tensor* InfRtPredictor::GetOutput(int i) {
return impl_->executor->GetOutput(i);
}
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace infrt {
class InfRtConfig {
std::string model_dir_;
std::string param_dir_;
std::vector<std::string> shared_libs_;
// TODO(wilber): Design an easy-to-use interface.
bool gpu_enabled_{false};
bool tensorrt_enabled_{false};
public:
InfRtConfig() = default;
void set_model_dir(const std::string& model_dir) { model_dir_ = model_dir; }
const std::string& model_dir() const { return model_dir_; }
void set_param_dir(const std::string& param_dir) { param_dir_ = param_dir; }
const std::string& param_dir() const { return param_dir_; }
void set_shared_libs(const std::vector<std::string>& shared_libs) {
shared_libs_ = shared_libs;
}
const std::vector<std::string>& shared_libs() const { return shared_libs_; }
void enable_gpu() { gpu_enabled_ = true; }
bool gpu_enabled() const { return gpu_enabled_; }
// TODO(wilber): Design an easy-to-use interface.
void enable_tensorrt() { tensorrt_enabled_ = true; }
void disable_tensorrt() { tensorrt_enabled_ = false; }
bool tensorrt_enabled() const { return tensorrt_enabled_; }
virtual ~InfRtConfig() = default;
};
class InfRtPredictor {
public:
InfRtPredictor();
~InfRtPredictor();
void Run();
int Init(const InfRtConfig& config);
int GetInputNum();
::Tensor* GetInput(int i);
int GetOutputNum();
::Tensor* GetOutput(int i);
protected:
struct Impl;
std::unique_ptr<Impl> impl_;
};
std::unique_ptr<InfRtPredictor> CreateInfRtPredictor(const InfRtConfig& config);
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "llvm/Support/raw_ostream.h"
#include "paddle/infrt/api/infrt_api.h"
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/infrt/common/buffer.h"
#include "paddle/infrt/common/dtype.h"
using infrt::InfRtConfig;
using infrt::InfRtPredictor;
using infrt::CreateInfRtPredictor;
namespace infrt {
TEST(InfRtPredictor, predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.set_model_dir("@CMAKE_BINARY_DIR@/linear/linear.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/linear/linear.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::CpuPhiAllocator cpu_allocator;
::Tensor* input = predictor->GetInput(0);
input->Resize({16, 784});
input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
auto* input_data = reinterpret_cast<float*>(input->data());
for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
predictor->Run();
// get and print output tensor
auto* output = predictor->GetOutput(0);
// TODO(Shixiaowei02): Automatic result validation for training then inference.
// auto* output_data = reinterpret_cast<float*>(output->data());
ASSERT_EQ(output->dims(), ::phi::DDim({16, 10}));
}
TEST(InfRtPredictor, cpu_predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::CpuPhiAllocator cpu_allocator;
::Tensor* input = predictor->GetInput(0);
input->Resize({2, 3, 256, 256});
input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
auto* input_data = reinterpret_cast<float*>(input->data());
for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto end = std::chrono::steady_clock::now();
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
// get and print output tensor
auto* output = predictor->GetOutput(0);
ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
const std::vector<float> true_vals {
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02,
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02
};
for (size_t i = 0; i < true_vals.size(); i+=100) {
CHECK_NEAR(output->data<float>()[i*100], true_vals[i], 1e-5);
}
}
#ifdef INFRT_WITH_TRT
TEST(InfRtPredictor, trt_predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.enable_tensorrt();
config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::CpuPhiAllocator cpu_allocator;
::Tensor* input = predictor->GetInput(0);
input->Resize({2, 3, 256, 256});
input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
auto* input_data = reinterpret_cast<float*>(input->data());
for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
predictor->Run();
// get and print output tensor
auto* output = predictor->GetOutput(0);
ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
const std::vector<float> true_vals {
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02,
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02
};
for (size_t i = 0; i < true_vals.size(); i+=100) {
CHECK_NEAR(output->data<float>()[i*100], true_vals[i], 1e-5);
}
}
#endif
#ifdef INFRT_WITH_GPU
TEST(InfRtPredictor, gpu_predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.enable_gpu();
config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::GpuPhiAllocator gpu_allocator;
::Tensor* input = predictor->GetInput(0);
input->Resize({2, 3, 256, 256});
input->AllocateFrom(&gpu_allocator, ::phi::DataType::FLOAT32);
auto* data = reinterpret_cast<float*>(input->data());
std::vector<float> input_data(2 * 3 * 256 * 256, 1.0);
cudaMemcpy(data,
input_data.data(),
sizeof(float) * input->numel(),
cudaMemcpyHostToDevice);
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < 1000; i++) {
predictor->Run();
}
auto end = std::chrono::steady_clock::now();
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
auto* output = predictor->GetOutput(0);
std::vector<float> output_data(output->numel());
cudaMemcpy(output_data.data(),
output->data<float>(),
sizeof(float) * output->numel(),
cudaMemcpyDeviceToHost);
ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
const std::vector<float> true_vals {
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02,
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02
};
for (size_t i = 0; i < true_vals.size(); i+=100) {
CHECK_NEAR(output_data[i*100], true_vals[i], 1e-5);
}
}
#endif
} // namespace infrt
if(INFRT_WITH_PHI
AND WITH_GPU
AND WITH_TENSORRT)
add_subdirectory(tensorrt)
endif()
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/memory/malloc.h"
#include "paddle/phi/core/allocator.h"
#ifdef INFRT_WITH_GPU
#include <cuda_runtime.h>
#endif
namespace infrt {
namespace backends {
class CpuPhiAllocator : public phi::Allocator {
public:
static void deleter(phi::Allocation* ptr) { ::operator delete(ptr); }
AllocationPtr Allocate(size_t bytes_size) {
return AllocationPtr(
new phi::Allocation(::operator new(bytes_size),
bytes_size,
phi::Place(phi::AllocationType::CPU)),
deleter);
}
};
#ifdef INFRT_WITH_GPU
// TODO(wilber): Just for demo test. we need a more efficient gpu allocator.
class GpuPhiAllocator : public phi::Allocator {
public:
static void deleter(phi::Allocation* ptr) { cudaFree(ptr->ptr()); }
AllocationPtr Allocate(size_t bytes_size) {
return paddle::memory::Alloc(phi::Place(phi::AllocationType::GPU),
bytes_size);
}
};
#endif
} // namespace backends
} // namespace infrt
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace infrt {
namespace backends {
class CpuPhiContext : public ::phi::CPUContext {
public:
using Base = ::phi::CPUContext;
using ::phi::CPUContext::SetEigenDevice;
CpuPhiContext() {
Init();
SetAllocator(alloc_.get());
}
private:
std::unique_ptr<::phi::Allocator> alloc_{std::make_unique<CpuPhiAllocator>()};
};
class GpuPhiContext : public ::phi::GPUContext {
public:
using Base = ::phi::GPUContext;
using ::phi::GPUContext::SetBlasHandle;
using ::phi::GPUContext::SetDnnHandle;
using ::phi::GPUContext::SetEigenDevice;
using ::phi::GPUContext::SetSolverHandle;
using ::phi::GPUContext::SetSparseHandle;
using ::phi::GPUContext::SetStream;
};
} // namespace backends
} // namespace infrt
add_subdirectory(plugin)
core_gather_headers()
gather_srcs(infrt_src SRCS trt_engine.cc)
cc_test_tiny(
test_infrt_trt
SRCS
test_trt_engine.cc
DEPS
infrt
phi_dynload_cuda
tensorrt_converter)
gather_srcs(infrt_src SRCS pool_op_plugin.cu)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <cassert>
#include <cstring>
#include <string>
#include <type_traits>
#include <vector>
#include "paddle/phi/backends/dynload/tensorrt.h"
namespace infrt {
namespace backends {
namespace tensorrt {
namespace plugin {
template <typename T>
inline void SerializeValue(void** buffer, T const& value);
template <typename T>
inline void DeserializeValue(void const** buffer,
size_t* buffer_size,
T* value);
namespace details {
template <typename T, class Enable = void>
struct Serializer {};
template <typename T>
struct Serializer<T,
typename std::enable_if<std::is_arithmetic<T>::value ||
std::is_enum<T>::value ||
std::is_pod<T>::value>::type> {
static size_t SerializedSize(T const& value) { return sizeof(T); }
static void Serialize(void** buffer, T const& value) {
std::memcpy(*buffer, &value, sizeof(T));
reinterpret_cast<char*&>(*buffer) += sizeof(T);
}
static void Deserialize(void const** buffer, size_t* buffer_size, T* value) {
assert(*buffer_size >= sizeof(T));
std::memcpy(value, *buffer, sizeof(T));
reinterpret_cast<char const*&>(*buffer) += sizeof(T);
*buffer_size -= sizeof(T);
}
};
template <>
struct Serializer<const char*> {
static size_t SerializedSize(const char* value) { return strlen(value) + 1; }
static void Serialize(void** buffer, const char* value) {
std::strcpy(static_cast<char*>(*buffer), value); // NOLINT
reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
}
static void Deserialize(void const** buffer,
size_t* buffer_size,
const char** value) {
*value = static_cast<char const*>(*buffer);
size_t data_size = strnlen(*value, *buffer_size) + 1;
assert(*buffer_size >= data_size);
reinterpret_cast<char const*&>(*buffer) += data_size;
*buffer_size -= data_size;
}
};
template <typename T>
struct Serializer<std::vector<T>,
typename std::enable_if<std::is_arithmetic<T>::value ||
std::is_enum<T>::value ||
std::is_pod<T>::value>::type> {
static size_t SerializedSize(std::vector<T> const& value) {
return sizeof(value.size()) + value.size() * sizeof(T);
}
static void Serialize(void** buffer, std::vector<T> const& value) {
SerializeValue(buffer, value.size());
size_t nbyte = value.size() * sizeof(T);
std::memcpy(*buffer, value.data(), nbyte);
reinterpret_cast<char*&>(*buffer) += nbyte;
}
static void Deserialize(void const** buffer,
size_t* buffer_size,
std::vector<T>* value) {
size_t size;
DeserializeValue(buffer, buffer_size, &size);
value->resize(size);
size_t nbyte = value->size() * sizeof(T);
CHECK_GE(*buffer_size, nbyte);
std::memcpy(value->data(), *buffer, nbyte);
reinterpret_cast<char const*&>(*buffer) += nbyte;
*buffer_size -= nbyte;
}
};
} // namespace details
template <typename T>
inline size_t SerializedSize(T const& value) {
return details::Serializer<T>::SerializedSize(value);
}
template <typename T>
inline void SerializeValue(void** buffer, T const& value) {
return details::Serializer<T>::Serialize(buffer, value);
}
template <typename T>
inline void DeserializeValue(void const** buffer,
size_t* buffer_size,
T* value) {
return details::Serializer<T>::Deserialize(buffer, buffer_size, value);
}
template <typename T>
class TrtPluginRegistrar {
public:
TrtPluginRegistrar() {
static auto func_ptr = static_cast<nvinfer1::IPluginRegistry*>(
::phi::dynload::getPluginRegistry());
func_ptr->registerCreator(instance, "");
}
private:
//! Plugin instance.
T instance{};
};
#define REGISTER_TRT_PLUGIN(name) \
static TrtPluginRegistrar<name> pluginRegistrar##name {}
} // namespace plugin
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/infrt/backends/tensorrt/plugin/plugin_utils.h"
#include "paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace infrt {
namespace backends {
namespace tensorrt {
namespace plugin {
PoolPlugin::PoolPlugin(bool ceil_mode,
PoolType pool_type,
bool adaptive,
bool exclusive,
std::vector<int> ksize,
std::vector<int> strides,
std::vector<int> paddings,
std::vector<int> input_shape,
std::vector<int> real_paddings)
: ceil_mode_(ceil_mode),
pool_type_(pool_type),
adaptive_(adaptive),
exclusive_(exclusive),
ksize_(ksize),
strides_(strides),
paddings_(paddings),
real_paddings_(real_paddings),
input_shape_(input_shape) {
output_shape_ = input_shape_;
std::vector<int> output_shape =
CalcOutputSize({input_shape_[1], input_shape_[2]},
ceil_mode_,
adaptive_,
ksize_,
strides_,
real_paddings_);
output_shape_[1] = output_shape[0];
output_shape_[2] = output_shape[1];
}
PoolPlugin::PoolPlugin(void const* serialData, size_t serialLength) {
// deserializeBase(serialData, serialLength);
DeserializeValue(&serialData, &serialLength, &ceil_mode_);
DeserializeValue(&serialData, &serialLength, &pool_type_);
DeserializeValue(&serialData, &serialLength, &adaptive_);
DeserializeValue(&serialData, &serialLength, &exclusive_);
DeserializeValue(&serialData, &serialLength, &ksize_);
DeserializeValue(&serialData, &serialLength, &strides_);
DeserializeValue(&serialData, &serialLength, &paddings_);
DeserializeValue(&serialData, &serialLength, &real_paddings_);
DeserializeValue(&serialData, &serialLength, &input_shape_);
DeserializeValue(&serialData, &serialLength, &output_shape_);
}
const char* PoolPlugin::getPluginType() const noexcept { return "pool_plugin"; }
const char* PoolPlugin::getPluginVersion() const noexcept { return "1"; }
int PoolPlugin::getNbOutputs() const noexcept { return 1; }
nvinfer1::Dims PoolPlugin::getOutputDimensions(int outputIndex,
const nvinfer1::Dims* inputs,
int nbInputs) noexcept {
assert(nbInputs == 1);
assert(index == 0);
assert(inputs[0].nbDims == 3);
nvinfer1::Dims const& input_dims = inputs[0];
nvinfer1::Dims output_dims = input_dims;
output_dims.d[1] = output_shape_[1];
output_dims.d[2] = output_shape_[2];
return output_dims;
}
int32_t PoolPlugin::initialize() noexcept { return 0; }
void PoolPlugin::terminate() noexcept {}
size_t PoolPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept {
return 0;
}
#if IS_TRT_VERSION_LT(8000)
int PoolPlugin::enqueue(int batch_size,
const void* const* inputs,
void** outputs,
#else
int PoolPlugin::enqueue(int batch_size,
const void* const* inputs,
void* const* outputs,
#endif
void* workspace,
cudaStream_t stream) noexcept {
// TODO(wilber)
int input_size = 0;
float const* idata = reinterpret_cast<float const*>(inputs[0]);
float* const* odatas = reinterpret_cast<float* const*>(outputs);
std::vector<int> input_shape = input_shape_;
std::vector<int> output_shape = output_shape_;
input_shape.insert(input_shape.begin(), batch_size);
output_shape.insert(output_shape.begin(), batch_size);
if (pool_type_ == PoolType::max) {
::phi::funcs::MaxPool<float> pool_process;
::phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
pool2d_forward;
pool2d_forward(idata,
input_shape,
output_shape,
ksize_,
strides_,
paddings_,
true,
false,
odatas[0],
stream,
pool_process);
} else if (pool_type_ == PoolType::avg) {
::phi::funcs::AvgPool<float> pool_process;
::phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
pool2d_forward;
pool2d_forward(idata,
input_shape,
output_shape,
ksize_,
strides_,
paddings_,
exclusive_,
adaptive_,
odatas[0],
stream,
pool_process);
}
return cudaGetLastError() != cudaSuccess;
}
// TODO(wilber): serialize base info?
size_t PoolPlugin::getSerializationSize() const noexcept {
return SerializedSize(ceil_mode_) + SerializedSize(pool_type_) +
SerializedSize(adaptive_) + SerializedSize(exclusive_) +
SerializedSize(ksize_) + SerializedSize(strides_) +
SerializedSize(paddings_) + SerializedSize(real_paddings_) +
SerializedSize(input_shape_) + SerializedSize(output_shape_);
}
// TODO(wilber): serialize base info?
void PoolPlugin::serialize(void* buffer) const noexcept {
// serializeBase(buffer);
SerializeValue(&buffer, ceil_mode_);
SerializeValue(&buffer, pool_type_);
SerializeValue(&buffer, adaptive_);
SerializeValue(&buffer, exclusive_);
SerializeValue(&buffer, ksize_);
SerializeValue(&buffer, strides_);
SerializeValue(&buffer, paddings_);
SerializeValue(&buffer, real_paddings_);
SerializeValue(&buffer, input_shape_);
SerializeValue(&buffer, output_shape_);
}
void PoolPlugin::destroy() noexcept { delete this; }
void PoolPlugin::setPluginNamespace(char const* plugin_namespace) noexcept {
namespace_ = plugin_namespace;
}
char const* PoolPlugin::getPluginNamespace() const noexcept {
return namespace_.c_str();
}
nvinfer1::DataType PoolPlugin::getOutputDataType(
int32_t index,
nvinfer1::DataType const* input_types,
int32_t nbInputs) const noexcept {
CHECK_EQ(index, 0);
CHECK_EQ((input_types[0] == nvinfer1::DataType::kFLOAT), true);
return input_types[0];
}
bool PoolPlugin::isOutputBroadcastAcrossBatch(int32_t outputIndex,
bool const* inputIsBroadcasted,
int32_t nbInputs) const noexcept {
return false;
}
bool PoolPlugin::canBroadcastInputAcrossBatch(
int32_t inputIndex) const noexcept {
return false;
}
nvinfer1::IPluginV2Ext* PoolPlugin::clone() const noexcept {
auto* plugin = new PoolPlugin(ceil_mode_,
pool_type_,
adaptive_,
exclusive_,
ksize_,
strides_,
paddings_,
input_shape_,
real_paddings_);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
void PoolPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in,
int32_t nb_input,
nvinfer1::PluginTensorDesc const* out,
int32_t nb_output) noexcept {
CHECK_EQ(nb_input, 1);
CHECK_EQ(nb_output, 1);
input_dims_ = in[0].dims;
data_format_ = in[0].format;
data_type_ = in[0].type;
}
bool PoolPlugin::supportsFormatCombination(
int32_t pos,
nvinfer1::PluginTensorDesc const* in_out,
int32_t nb_inputs,
int32_t nb_outputs) const noexcept {
CHECK_LT(pos, nb_inputs + nb_outputs);
CHECK_NOTNULL(in_out);
return ((in_out[pos].type == nvinfer1::DataType::kFLOAT) &&
in_out[pos].format == nvinfer1::PluginFormat::kLINEAR);
}
nvinfer1::IPluginV2* PoolPluginCreator::createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept {
// auto* plugin = new UffPoolPluginV2(*fc);
field_collection_ = *fc;
plugin_name_ = name;
const nvinfer1::PluginField* fields = fc->fields;
bool ceil_mode;
PoolPlugin::PoolType pool_type;
bool adaptive;
bool exclusive;
std::vector<int> ksize;
std::vector<int> strides;
std::vector<int> paddings;
std::vector<int> real_paddings;
std::vector<int> input_shape;
std::vector<int> output_shape;
// TODO(wilber): add implement.
CHECK(false) << "not implement";
// for (int i = 0; i < fc->nbFields; ++i) {
// const char* attr_name = fields[i].name;
// if (!strcmp(attr_name, "ceil_mode")) {
// CHECK_EQ(fields[i].type == nvinfer1::PluginFieldType::kINT8, true);
// ceil_mode = *static_cast<const bool*>(fields[i].data);
// // mParam.numOutputBoxesPerClass =
// // *(static_cast<const int*>(fields[i].data));
// }
// }
return nullptr;
}
nvinfer1::IPluginV2* PoolPluginCreator::deserializePlugin(
const char* name, const void* serialData, size_t serialLength) noexcept {
auto* plugin = new PoolPlugin(serialData, serialLength);
plugin_name_ = name;
return plugin;
}
} // namespace plugin
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInferRuntime.h>
#include <NvInferRuntimeCommon.h>
#include <stdio.h>
#include <cassert>
#include <string>
#include <vector>
#include "paddle/infrt/backends/tensorrt/plugin/plugin_utils.h"
#include "paddle/infrt/backends/tensorrt/trt_utils.h"
namespace infrt {
namespace backends {
namespace tensorrt {
namespace plugin {
static std::vector<int> CalcOutputSize(const std::vector<int>& input_shape,
const bool& ceil_mode,
const bool& adaptive,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& real_paddings) {
std::vector<int> output_shape = input_shape;
if (adaptive) {
output_shape[0] = ksize[0];
output_shape[1] = ksize[1];
} else {
int output_h = 0, output_w = 0;
if (ceil_mode) {
output_h = (input_shape[0] - ksize[0] + real_paddings[0] +
real_paddings[1] + strides[0] - 1) /
strides[0] +
1;
output_w = (input_shape[1] - ksize[1] + real_paddings[2] +
real_paddings[3] + strides[1] - 1) /
strides[1] +
1;
}
// TRT will use native layer when ceil_model=false
/*
else{
output_h = (input_shape[0] - ksize[0] + real_paddings[0] +
real_paddings[1]) / strides[0] + 1;
output_w = (input_shape[1] - ksize[1] + real_paddings[2] +
real_paddings[3]) / strides[1] + 1;
}
*/
output_shape[0] = output_h;
output_shape[1] = output_w;
}
return output_shape;
}
class PoolPlugin : public nvinfer1::IPluginV2IOExt {
public:
enum class PoolType {
max = 0,
avg,
};
PoolPlugin() {}
PoolPlugin(bool ceil_mode,
PoolType pool_type,
bool adaptive,
bool exclusive,
std::vector<int> ksize,
std::vector<int> strides,
std::vector<int> paddings,
std::vector<int> input_shape,
std::vector<int> real_paddings);
PoolPlugin(void const* serialData, size_t serialLength);
// IPluginV2 methods
const char* getPluginType() const noexcept override;
const char* getPluginVersion() const noexcept override;
int getNbOutputs() const noexcept override;
nvinfer1::Dims getOutputDimensions(int outputIndex,
const nvinfer1::Dims* inputs,
int nbInputs) noexcept override;
int32_t initialize() noexcept override;
void terminate() noexcept override;
size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize,
const void* const* inputs,
void** outputs,
#else
int enqueue(int batchSize,
const void* const* inputs,
void* const* outputs,
#endif
void* workspace,
cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void* buffer) const noexcept override;
void destroy() noexcept override;
void setPluginNamespace(char const* pluginNamespace) noexcept override;
char const* getPluginNamespace() const noexcept override;
// IPluginV2Ext methods
nvinfer1::DataType getOutputDataType(
int32_t index,
nvinfer1::DataType const* inputTypes,
int32_t nbInputs) const noexcept override;
bool isOutputBroadcastAcrossBatch(int32_t outputIndex,
bool const* inputIsBroadcasted,
int32_t nbInputs) const noexcept override;
bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept override;
// void attachToContext(cudnnContext*,
// cublasContext*,
// IGpuAllocator*) noexcept override;
// void detachFromContext() noexcept override;
IPluginV2Ext* clone() const noexcept override;
// IPluginV2IOExt methods
void configurePlugin(nvinfer1::PluginTensorDesc const* in,
int32_t nb_input,
nvinfer1::PluginTensorDesc const* out,
int32_t nb_output) noexcept override;
bool supportsFormatCombination(int32_t pos,
nvinfer1::PluginTensorDesc const* inOut,
int32_t nb_inputs,
int32_t nb_outputs) const noexcept override;
private:
bool ceil_mode_;
PoolType pool_type_;
bool adaptive_;
bool exclusive_;
std::vector<int> ksize_;
std::vector<int> strides_;
std::vector<int> paddings_;
std::vector<int> real_paddings_;
std::vector<int> input_shape_;
std::vector<int> output_shape_;
private:
nvinfer1::Dims input_dims_;
nvinfer1::DataType data_type_;
nvinfer1::PluginFormat data_format_;
std::string namespace_;
};
class PoolPluginCreator : public nvinfer1::IPluginCreator {
public:
const char* getPluginName() const noexcept override { return "pool_plugin"; }
const char* getPluginVersion() const noexcept override { return "1"; }
const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override {
return &field_collection_;
}
nvinfer1::IPluginV2* createPlugin(
const char* name,
const nvinfer1::PluginFieldCollection* fc) noexcept override;
nvinfer1::IPluginV2* deserializePlugin(const char* name,
const void* serialData,
size_t serialLength) noexcept override;
void setPluginNamespace(const char* plugin_namespace) noexcept override {
plugin_namespace_ = plugin_namespace;
}
const char* getPluginNamespace() const noexcept override {
return plugin_namespace_.c_str();
}
private:
std::string plugin_namespace_;
std::string plugin_name_;
nvinfer1::PluginFieldCollection field_collection_{0, nullptr};
};
REGISTER_TRT_PLUGIN(PoolPluginCreator);
} // namespace plugin
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <NvInferRuntimeCommon.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <math.h>
#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include "paddle/infrt/backends/tensorrt/trt_options.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/meta_tensor.h"
namespace infrt {
namespace backends {
namespace tensorrt {
const char* model_input = "input_0";
const char* model_output = "output_0";
const char* model_output2 = "output_1";
TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructNetwork(
nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
TrtUniquePtr<nvinfer1::INetworkDefinition> network;
if (is_static_shape) {
network.reset(builder->createNetworkV2(0U));
} else {
auto networkFlags =
1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
network.reset(builder->createNetworkV2(networkFlags));
}
ITensor* data =
network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
CHECK_NOTNULL(data);
IActivationLayer* act =
network->addActivation(*data, ActivationType::kSIGMOID);
CHECK_NOTNULL(act);
auto* act_out = act->getOutput(0);
std::vector<int> output_length{1, 2};
int axis;
nvinfer1::IPluginV2Layer* split_layer;
if (is_static_shape) {
axis = 0;
paddle::inference::tensorrt::plugin::SplitPlugin plugin(
axis, output_length, false);
split_layer = network->addPluginV2(&act_out, 1, plugin);
} else {
axis = 1;
paddle::inference::tensorrt::plugin::SplitPluginDynamic plugin(
axis, output_length, false);
split_layer = network->addPluginV2(&act_out, 1, plugin);
}
split_layer->getOutput(0)->setName(model_output);
split_layer->getOutput(1)->setName(model_output2);
network->markOutput(*split_layer->getOutput(0));
network->markOutput(*split_layer->getOutput(1));
return network;
}
TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructFCNetwork(
nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
TrtUniquePtr<nvinfer1::INetworkDefinition> network;
if (is_static_shape) {
network.reset(builder->createNetworkV2(0U));
} else {
auto networkFlags =
1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
network.reset(builder->createNetworkV2(networkFlags));
}
ITensor* data =
network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
CHECK_NOTNULL(data);
nvinfer1::Weights kernel_weights;
kernel_weights.type = nvinfer1::DataType::kFLOAT;
kernel_weights.count = 7840;
std::vector<float> weight_data(kernel_weights.count);
for (size_t i = 0; i < weight_data.size(); ++i) {
weight_data[i] = i % 255 * 0.02f;
}
kernel_weights.values = weight_data.data();
auto* layer = network->addFullyConnected(
*data, 10, kernel_weights, nvinfer1::Weights{});
CHECK_NOTNULL(layer);
auto* out = layer->getOutput(0);
out->setName(model_output);
network->markOutput(*out);
return network;
}
TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructConvNetwork(
nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
TrtUniquePtr<nvinfer1::INetworkDefinition> network;
if (is_static_shape) {
network.reset(builder->createNetworkV2(0U));
} else {
auto networkFlags =
1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
network.reset(builder->createNetworkV2(networkFlags));
}
ITensor* data =
network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
CHECK_NOTNULL(data);
nvinfer1::Weights kernel_weights, bias_weights;
kernel_weights.type = nvinfer1::DataType::kFLOAT;
bias_weights.type = nvinfer1::DataType::kFLOAT;
kernel_weights.count = 81;
bias_weights.count = 3;
std::vector<float> weight_data(kernel_weights.count);
for (size_t i = 0; i < weight_data.size(); ++i) {
weight_data[i] = i * 0.02f;
}
std::vector<float> bias_data(bias_weights.count);
for (size_t i = 0; i < bias_data.size(); ++i) {
bias_data[i] = i * 0.5f;
}
kernel_weights.values = weight_data.data();
bias_weights.values = bias_data.data();
nvinfer1::Dims ksize;
ksize.nbDims = 2;
ksize.d[0] = 3;
ksize.d[1] = 3;
auto* layer =
network->addConvolutionNd(*data, 3, ksize, kernel_weights, bias_weights);
CHECK_NOTNULL(layer);
auto* out = layer->getOutput(0);
out->setName(model_output);
network->markOutput(*out);
return network;
}
// sigmoid(x) = 1 / (1 + exp(-x))
inline float sigmoid(float x) { return 1.f / (1.f + exp(-1 * x)); }
TEST(trt, run_fc_static) {
TrtEngine engine(0);
auto net = ConstructFCNetwork(
engine.GetTrtBuilder(), nvinfer1::Dims3{1, 28, 28}, true);
BuildOptions build_options;
build_options.max_batch = 4;
build_options.workspace = 1024;
engine.Build(std::move(net), build_options);
InferenceOptions inference_options;
inference_options.batch = 1;
phi::GPUPlace place;
phi::GPUContext context;
context.PartialInitWithoutAllocator();
context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place, context.stream())
.get());
context.PartialInitWithAllocator();
phi::DenseTensorMeta meta(
phi::DataType::FLOAT32,
phi::make_ddim({inference_options.batch, 1, 28, 28}));
phi::DenseTensor input;
input.set_meta(meta);
context.Alloc<float>(&input, input.numel() * sizeof(float));
std::vector<float> host_data(inference_options.batch * 1 * 28 * 28, 0);
for (size_t i = 0; i < host_data.size(); ++i) {
host_data[i] = i % 100 * 0.016f;
}
paddle::memory::Copy(place,
input.data<float>(),
phi::CPUPlace(),
host_data.data(),
sizeof(float) * host_data.size(),
context.stream());
std::unordered_map<std::string, phi::DenseTensor*> inputs;
inputs.emplace(std::make_pair(model_input, &input));
engine.PrepareOutputHandle("output_0");
engine.SetUpInference(inference_options, inputs);
engine.GetEngineInfo();
engine.Run(context);
cudaStreamSynchronize(context.stream());
}
TEST(trt, run_conv_static) {
TrtEngine engine(0);
auto net = ConstructConvNetwork(
engine.GetTrtBuilder(), nvinfer1::Dims3{3, 28, 28}, true);
BuildOptions build_options;
build_options.max_batch = 4;
build_options.workspace = 1024;
engine.Build(std::move(net), build_options);
InferenceOptions inference_options;
inference_options.batch = 1;
phi::GPUPlace place;
phi::GPUContext context;
context.PartialInitWithoutAllocator();
context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place, context.stream())
.get());
context.PartialInitWithAllocator();
phi::DenseTensorMeta meta(
phi::DataType::FLOAT32,
phi::make_ddim({inference_options.batch, 3, 28, 28}));
phi::DenseTensor input;
input.set_meta(meta);
context.Alloc<float>(&input, input.numel() * sizeof(float));
std::vector<float> host_data(inference_options.batch * 3 * 28 * 28, 0);
for (size_t i = 0; i < host_data.size(); ++i) {
host_data[i] = i % 100 * 0.016f;
}
paddle::memory::Copy(place,
input.data<float>(),
phi::CPUPlace(),
host_data.data(),
sizeof(float) * host_data.size(),
context.stream());
std::unordered_map<std::string, phi::DenseTensor*> inputs;
inputs.emplace(std::make_pair(model_input, &input));
engine.PrepareOutputHandle("output_0");
engine.SetUpInference(inference_options, inputs);
engine.GetEngineInfo();
engine.Run(context);
cudaStreamSynchronize(context.stream());
}
TEST(trt, run_static) {
TrtEngine static_trt_engine(0);
auto net = ConstructNetwork(
static_trt_engine.GetTrtBuilder(), nvinfer1::Dims3{3, 28, 28}, true);
BuildOptions static_build_options;
static_build_options.max_batch = 4;
static_trt_engine.Build(std::move(net), static_build_options);
InferenceOptions inference_options;
inference_options.batch = 2;
phi::GPUPlace place;
phi::GPUContext context;
context.PartialInitWithoutAllocator();
context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place, context.stream())
.get());
context.PartialInitWithAllocator();
phi::DenseTensorMeta meta(
phi::DataType::FLOAT32,
phi::make_ddim({inference_options.batch, 3, 28, 28}));
phi::DenseTensor input;
input.set_meta(meta);
context.Alloc<float>(&input, input.numel() * sizeof(float));
std::vector<float> host_data(inference_options.batch * 3 * 28 * 28, 0);
for (size_t i = 0; i < host_data.size(); ++i) {
host_data[i] = i % 100 * 0.016f;
}
paddle::memory::Copy(place,
input.data<float>(),
phi::CPUPlace(),
host_data.data(),
sizeof(float) * host_data.size(),
context.stream());
std::unordered_map<std::string, phi::DenseTensor*> inputs;
inputs.emplace(std::make_pair(model_input, &input));
static_trt_engine.PrepareOutputHandle("output_0");
static_trt_engine.PrepareOutputHandle("output_1");
static_trt_engine.SetUpInference(inference_options, inputs);
static_trt_engine.GetEngineInfo();
static_trt_engine.Run(context);
phi::DenseTensor* output0 = static_trt_engine.GetOutput("output_0");
phi::DenseTensor* output1 = static_trt_engine.GetOutput("output_1");
std::vector<float> output_data1(inference_options.batch * 1 * 28 * 28, 0);
std::vector<float> output_data2(inference_options.batch * 2 * 28 * 28, 0);
paddle::memory::Copy(phi::CPUPlace(),
output_data1.data(),
place,
output0->data<float>(),
sizeof(float) * output_data1.size(),
context.stream());
paddle::memory::Copy(phi::CPUPlace(),
output_data2.data(),
place,
output1->data<float>(),
sizeof(float) * output_data2.size(),
context.stream());
cudaStreamSynchronize(context.stream());
for (size_t i = 0; i < host_data.size(); ++i) {
int w = i % 28;
int h = (i / 28) % 28;
int c = i / (28 * 28) % 3;
int n = i / (28 * 28 * 3);
if (c == 0) {
CHECK_NEAR(
sigmoid(host_data[i]), output_data1[n * 28 * 28 + h * 28 + w], 1e-5);
} else {
CHECK_NEAR(sigmoid(host_data[i]),
output_data2[n * 28 * 28 * 2 + (c - 1) * 28 * 28 + h * 28 + w],
1e-5);
}
}
}
TEST(trt, run_dynamic) {
TrtEngine engine(0);
auto net = ConstructNetwork(
engine.GetTrtBuilder(), nvinfer1::Dims4{-1, 3, -1, -1}, false);
BuildOptions build_options;
build_options.max_batch = 4;
build_options.workspace = 32;
// build_options.fp16 = true;
std::vector<int32_t> min_shape{1, 3, 16, 16};
std::vector<int32_t> opt_shape{2, 3, 28, 28};
std::vector<int32_t> max_shape{4, 3, 28, 28};
build_options.shapes[model_input][0] = min_shape;
build_options.shapes[model_input][1] = opt_shape;
build_options.shapes[model_input][2] = max_shape;
engine.Build(std::move(net), build_options);
InferenceOptions inference_options;
inference_options.batch = 2;
phi::GPUPlace place;
phi::GPUContext context;
context.PartialInitWithoutAllocator();
context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place, context.stream())
.get());
context.PartialInitWithAllocator();
phi::DenseTensorMeta meta(
phi::DataType::FLOAT32,
phi::make_ddim({inference_options.batch, 3, 16, 16}));
phi::DenseTensor input, output, output2;
input.set_meta(meta);
context.Alloc<float>(&input, input.numel() * sizeof(float));
std::vector<float> host_data(inference_options.batch * 3 * 16 * 16, 0);
for (size_t i = 0; i < host_data.size(); ++i) {
host_data[i] = i % 100 * 0.016f;
}
paddle::memory::Copy(place,
input.data<float>(),
phi::CPUPlace(),
host_data.data(),
sizeof(float) * host_data.size(),
context.stream());
std::unordered_map<std::string, phi::DenseTensor*> inputs;
inputs.emplace(std::make_pair(model_input, &input));
engine.PrepareOutputHandle("output_0");
engine.PrepareOutputHandle("output_1");
engine.SetUpInference(inference_options, inputs);
engine.GetEngineInfo();
engine.Run(context);
phi::DenseTensor* output0 = engine.GetOutput("output_0");
phi::DenseTensor* output1 = engine.GetOutput("output_1");
std::vector<float> output_data1(inference_options.batch * 1 * 16 * 16, 0);
std::vector<float> output_data2(inference_options.batch * 2 * 16 * 16, 0);
paddle::memory::Copy(phi::CPUPlace(),
output_data1.data(),
place,
output0->data<float>(),
sizeof(float) * output_data1.size(),
context.stream());
paddle::memory::Copy(phi::CPUPlace(),
output_data2.data(),
place,
output1->data<float>(),
sizeof(float) * output_data2.size(),
context.stream());
cudaStreamSynchronize(context.stream());
for (size_t i = 0; i < host_data.size(); ++i) {
int w = i % 16;
int h = (i / 16) % 16;
int c = i / (16 * 16) % 3;
int n = i / (16 * 16 * 3);
if (c == 0) {
CHECK_NEAR(
sigmoid(host_data[i]), output_data1[n * 16 * 16 + h * 16 + w], 1e-5);
} else {
CHECK_NEAR(sigmoid(host_data[i]),
output_data2[n * 16 * 16 * 2 + (c - 1) * 16 * 16 + h * 16 + w],
1e-5);
}
}
}
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include <NvInferRuntime.h>
#include <NvInferRuntimeCommon.h>
#include <glog/logging.h>
#include "paddle/phi/backends/dynload/tensorrt.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h"
namespace infrt {
namespace backends {
namespace tensorrt {
// The following two API are implemented in TensorRT's header file, cannot load
// from the dynamic library. So create our own implementation and directly
// trigger the method from the dynamic library.
static nvinfer1::IBuilder* createInferBuilder(
nvinfer1::ILogger& logger) { // NOLINT
return static_cast<nvinfer1::IBuilder*>(
::phi::dynload::createInferBuilder_INTERNAL(&logger,
NV_TENSORRT_VERSION));
}
static nvinfer1::IRuntime* createInferRuntime(
nvinfer1::ILogger& logger) { // NOLINT
return static_cast<nvinfer1::IRuntime*>(
::phi::dynload::createInferRuntime_INTERNAL(&logger,
NV_TENSORRT_VERSION));
}
TrtEngine::TrtEngine(int device_id) : device_id_(device_id) {
FreshDeviceId();
logger_.reset(new TrtLogger());
builder_.reset(createInferBuilder(logger_->GetTrtLogger()));
::phi::dynload::initLibNvInferPlugins(&logger_->GetTrtLogger(), "");
}
nvinfer1::IBuilder* TrtEngine::GetTrtBuilder() {
CHECK_NOTNULL(builder_);
return builder_.get();
}
void TrtEngine::Build(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
const BuildOptions& build_options) {
FreshDeviceId();
ModelToBuildEnv(std::move(network), build_options);
CHECK_NOTNULL(engine_);
}
bool TrtEngine::ModelToBuildEnv(
TrtUniquePtr<nvinfer1::INetworkDefinition> network,
const BuildOptions& build) {
CHECK_NOTNULL(builder_);
std::swap(network, network_);
CHECK_NOTNULL(network_);
// ModelToNetwork(network_, logger);
NetworkToEngine(build);
return true;
}
bool TrtEngine::NetworkToEngine(const BuildOptions& build) {
TrtUniquePtr<IBuilderConfig> config{builder_->createBuilderConfig()};
CHECK_NOTNULL(config);
CHECK(SetupNetworkAndConfig(build, *network_, *config));
#if IS_TRT_VERSION_LT(8000)
engine_.reset(builder_->buildEngineWithConfig(*network_, *config));
#else
serialized_engine_.reset(
builder_->buildSerializedNetwork(*network_, *config));
CHECK_NOTNULL(serialized_engine_);
TrtUniquePtr<IRuntime> runtime{createInferRuntime(logger_->GetTrtLogger())};
CHECK_NOTNULL(runtime);
engine_.reset(runtime->deserializeCudaEngine(serialized_engine_->data(),
serialized_engine_->size()));
CHECK_NOTNULL(engine_);
#endif
return true;
}
bool TrtEngine::SetupNetworkAndConfig(const BuildOptions& build,
INetworkDefinition& network,
IBuilderConfig& config) {
builder_->setMaxBatchSize(build.max_batch);
// TODO(wilber): handle one engine - multi execution context case.
IOptimizationProfile* profile{nullptr};
if (!build.shapes.empty()) {
profile = builder_->createOptimizationProfile();
CHECK_NOTNULL(profile);
}
// Set formats and data types of inputs
for (int32_t i = 0; i < network.getNbInputs(); ++i) {
auto* input = network.getInput(i);
if (!build.input_formats.empty()) {
input->setType(build.input_formats[i].first);
input->setAllowedFormats(build.input_formats[i].second);
} else {
switch (input->getType()) {
case DataType::kINT32:
case DataType::kBOOL:
case DataType::kHALF:
// Leave these as is.
break;
case DataType::kFLOAT:
case DataType::kINT8:
// User did not specify a floating-point format. Default to kFLOAT.
input->setType(DataType::kFLOAT);
break;
}
input->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
}
if (profile) {
Dims dims = input->getDimensions();
// TODO(wilber): shape tensor.
const bool is_dynamic_input = std::any_of(
dims.d, dims.d + dims.nbDims, [](int dim) { return dim == -1; });
if (is_dynamic_input) {
is_dynamic_shape_ = true;
auto shape = build.shapes.find(input->getName());
// If no shape is provided
if (shape == build.shapes.end()) {
// TODO(wilber): add infomation.
CHECK(false);
}
LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode.";
std::vector<int> profile_dims{};
profile_dims =
shape->second[static_cast<size_t>(OptProfileSelector::kMIN)];
CHECK(profile->setDimensions(input->getName(),
OptProfileSelector::kMIN,
VecToDims(profile_dims)));
profile_dims =
shape->second[static_cast<size_t>(OptProfileSelector::kOPT)];
CHECK(profile->setDimensions(input->getName(),
OptProfileSelector::kOPT,
VecToDims(profile_dims)));
profile_dims =
shape->second[static_cast<size_t>(OptProfileSelector::kMAX)];
CHECK(profile->setDimensions(input->getName(),
OptProfileSelector::kMAX,
VecToDims(profile_dims)));
}
}
}
if (profile && is_dynamic_shape_) {
CHECK(profile->isValid()); // Required optimization profile is invalid
CHECK_NE(config.addOptimizationProfile(profile), -1);
}
// Set formats and data types of outputs
for (int32_t i = 0, n = network.getNbOutputs(); i < n; i++) {
auto* output = network.getOutput(i);
if (!build.output_formats.empty()) {
// int outputFormatIndex = broadcastOutputFormats ? 0 : i;
output->setType(build.output_formats[i].first);
output->setAllowedFormats(build.output_formats[i].second);
} else {
output->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
}
}
config.setMaxWorkspaceSize(static_cast<size_t>(build.workspace) << 20);
if (build.fp16) {
config.setFlag(BuilderFlag::kFP16);
bool support_fp16 = builder_->platformHasFastFp16();
if (support_fp16) {
LOG(INFO) << "Run INFRT-TRT FP16 mode";
} else {
LOG(INFO) << "You specify FP16 mode, but the hardware do not support "
"FP16 speed up, use FP32 instead.";
}
}
if (build.tf32) {
config.setFlag(BuilderFlag::kTF32);
bool support_tf32 = builder_->platformHasTf32();
if (support_tf32) {
LOG(INFO) << "Run INFRT-TRT TF32 mode";
} else {
LOG(INFO) << "You specify TF32 mode, but the hardware do not support "
"TF32 speed up, use FP32 instead.";
}
}
// TODO(wilber): other precision.
// TODO(wilber): precision config.
switch (build.precision_constraints) {
case PrecisionConstraints::kNONE:
// It's the default for TensorRT.
break;
#if IS_TRT_VERSION_GE(8200)
case PrecisionConstraints::kOBEY:
config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS);
break;
case PrecisionConstraints::kPREFER:
config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
break;
#endif // IS_TRT_VERSION_GE(8200)
default:
break;
}
// TODO(TRT): DLA config.
// TODO(TRT): int8 config.
// TODO(TRT): support int8
if (build.int8) {
assert(false);
config.setFlag(BuilderFlag::kINT8);
bool support_int8 = builder_->platformHasFastInt8();
if (support_int8) {
LOG(INFO) << "Run INFRT-TRT FP16 mode";
}
}
// TODO(TRT): calib config.
// TODO(TRT): sparse config.
return true;
}
void TrtEngine::PrepareOutputHandle(const std::string& out_name) {
::Tensor t;
outputs_.emplace(out_name, t);
}
::Tensor* TrtEngine::GetOutput(const std::string& name) {
return &outputs_[name];
}
size_t TrtEngine::GetOutputNum() const { return outputs_.size(); }
bool TrtEngine::SetUpInference(
const InferenceOptions& inference,
const std::unordered_map<std::string, ::Tensor*>& inputs) {
// TODO(wilber): now only create one exec_context
FreshDeviceId();
CHECK(engine_ != nullptr);
nvinfer1::IExecutionContext* ec = engine_->createExecutionContext();
CHECK(ec != nullptr);
contexts_.emplace_back(ec);
bindings_.emplace_back(new Bindings());
for (const auto& it : inputs) {
const int bind_index = engine_->getBindingIndex(it.first.c_str());
bindings_.front()->AddBinding(
bind_index, it.first, true, it.second, nvinfer1::DataType::kFLOAT);
}
for (auto& it : outputs_) {
const int bind_index = engine_->getBindingIndex(it.first.c_str());
bindings_.front()->AddBinding(
bind_index, it.first, false, &it.second, nvinfer1::DataType::kFLOAT);
}
return true;
}
void TrtEngine::Run(const ::phi::GPUContext& ctx) {
if (is_dynamic_shape_) {
DynamicRun(ctx);
} else {
StaticRun(ctx);
}
}
void TrtEngine::StaticRun(const ::phi::GPUContext& ctx) {
const int num_bindings = engine_->getNbBindings();
std::vector<void*> buffers(num_bindings, nullptr);
int runtime_batch = -1;
auto input_binds = bindings_.front()->GetInputBindings();
for (auto bind : input_binds) {
const int bind_index = engine_->getBindingIndex(bind.name.c_str());
buffers[bind_index] =
const_cast<void*>(static_cast<const void*>(bind.buffer->data<float>()));
if (runtime_batch != -1) {
CHECK_EQ(runtime_batch,
::phi::vectorize<int64_t>(bind.buffer->dims())[0]);
}
runtime_batch = bind.buffer->dims()[0];
}
auto output_binds = bindings_.front()->GetOutputBindings();
for (auto bind : output_binds) {
const int bind_index = engine_->getBindingIndex(bind.name.c_str());
std::vector<int32_t> ddim;
auto dims = engine_->getBindingDimensions(bind_index);
CHECK_NE(runtime_batch, -1) << "runtime_batch should not be -1.";
ddim.push_back(runtime_batch);
for (int i = 0; i < dims.nbDims; ++i) {
ddim.push_back(dims.d[i]);
}
bind.buffer->Resize(::phi::make_ddim(ddim));
// TODO(wilber): now only support float output.
ctx.Alloc<float>(bind.buffer, sizeof(float) * bind.buffer->numel());
buffers[bind_index] = static_cast<void*>(bind.buffer->data<float>());
}
contexts_.front()->enqueue(
runtime_batch, buffers.data(), ctx.stream(), nullptr);
}
void TrtEngine::DynamicRun(const ::phi::GPUContext& ctx) {
const int num_bindings = engine_->getNbBindings();
std::vector<void*> buffers(num_bindings, nullptr);
auto input_binds = bindings_.front()->GetInputBindings();
for (auto bind : input_binds) {
const int bind_index = engine_->getBindingIndex(bind.name.c_str());
buffers[bind_index] =
const_cast<void*>(static_cast<const void*>(bind.buffer->data<float>()));
nvinfer1::Dims trt_dims;
trt_dims.nbDims = bind.buffer->dims().size();
for (int i = 0; i < trt_dims.nbDims; ++i) {
trt_dims.d[i] = bind.buffer->dims()[i];
}
contexts_.front()->setBindingDimensions(bind_index, trt_dims);
}
CHECK(contexts_.front()->allInputDimensionsSpecified());
auto output_binds = bindings_.front()->GetOutputBindings();
for (auto bind : output_binds) {
const int bind_index = engine_->getBindingIndex(bind.name.c_str());
auto dims = contexts_.front()->getBindingDimensions(bind_index);
std::vector<int32_t> ddim(dims.nbDims);
for (int i = 0; i < dims.nbDims; ++i) {
ddim[i] = dims.d[i];
}
bind.buffer->Resize(::phi::make_ddim(ddim));
ctx.Alloc<float>(bind.buffer, sizeof(float) * bind.buffer->numel());
buffers[bind_index] = static_cast<void*>(bind.buffer->data<float>());
}
contexts_.front()->enqueueV2(buffers.data(), ctx.stream(), nullptr);
}
void TrtEngine::FreshDeviceId() {
int count;
cudaGetDeviceCount(&count);
CHECK_LT(device_id_, count);
::phi::backends::gpu::SetDeviceId(device_id_);
}
void TrtEngine::GetEngineInfo() {
#if IS_TRT_VERSION_GE(8200)
LOG(INFO) << "====== engine info ======";
std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
engine_->createEngineInspector());
infer_inspector->setExecutionContext(contexts_.front().get());
LOG(INFO) << infer_inspector->getEngineInformation(
nvinfer1::LayerInformationFormat::kONELINE);
LOG(INFO) << "====== engine info end ======";
#else
LOG(INFO) << "Inspector needs TensorRT version 8.2 and after.";
#endif
}
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include "paddle/infrt/backends/tensorrt/trt_options.h"
#include "paddle/infrt/backends/tensorrt/trt_utils.h"
#include "paddle/phi/backends/dynload/tensorrt.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
namespace infrt {
namespace backends {
namespace tensorrt {
using namespace nvinfer1; // NOLINT
// The trt programing model as follows:
// 1. The build phase:
// IBuilder* builder = createInferBuilder(&logger_);
// 2. Create a network definition:
// INetworkDefinition* network = builder->createNetworkV2(...);
// 3. Build network:
// network->AddLayer(...)
// 4. Configure network:
// IBuilderConfig* config = builder->createBuilderConfig();
// config->setMaxWorkspaceSize(...)
// 5. Get cuda engine and deserializing a plan:
// IHostMemory* serialized_model = builder->buildSerializedNetwork(...);
// IRuntime* runtime = createInferRuntime(&logger_);
// ICudaEngine* engine = runtime->deserializeCudaEngine(...);
// 6. Get execution context:
// IExecutionContext* exec_context = engine->createExecutionContext();
// 7. Set input data:
// int32_t input_index = engine->getBindingIndex("input");
// int32_t output_index = engine->getBindingIndex("output");
// void* buffers[2];
// buffers[input_index] = input_buffer;
// buffers[output_index] = output_buffer;
// 8. Performance inference:
// exec_context->enqueueV2(buffers, stream, nullptr);
//
// We have encapsulated this logic, please use the following programming model.
//
// TrtEngine trt_engine;
// trt_engine.Build(...);
// trt_engine.SetUpInference(...);
// trt_engine.Run(...);
class TrtEngine {
public:
explicit TrtEngine(int device_id = 0);
TrtEngine(const TrtEngine&) = delete;
TrtEngine& operator=(const TrtEngine&) = delete;
TrtEngine(TrtEngine&&) = default;
TrtEngine& operator=(TrtEngine&&) = default;
nvinfer1::IBuilder* GetTrtBuilder();
// TODO(wilber): Modify signature after infrt-trt ready.
void Build(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
const BuildOptions& build_options);
// TODO(wilber): Modify signature after infrt-trt ready.
void Run(const ::phi::GPUContext& ctx);
// TODO(wilber): How to support multiple execution contexts?
bool SetUpInference(const InferenceOptions& inference,
const std::unordered_map<std::string, ::Tensor*>& inputs);
void GetEngineInfo();
void PrepareOutputHandle(const std::string& out_name);
// TODO(wilber): The output tensor names are: output_0, output_1, ...
::Tensor* GetOutput(const std::string&);
size_t GetOutputNum() const;
private:
void FreshDeviceId();
bool SetupNetworkAndConfig(const BuildOptions& build,
INetworkDefinition& network, // NOLINT
IBuilderConfig& config); // NOLINT
bool NetworkToEngine(const BuildOptions& build);
bool ModelToBuildEnv(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
const BuildOptions& build);
void StaticRun(const ::phi::GPUContext& ctx);
void DynamicRun(const ::phi::GPUContext& ctx);
private:
std::unique_ptr<TrtLogger> logger_{nullptr};
TrtUniquePtr<nvinfer1::IBuilder> builder_{nullptr};
TrtUniquePtr<INetworkDefinition> network_{nullptr};
std::unique_ptr<IHostMemory> serialized_engine_{nullptr};
TrtUniquePtr<nvinfer1::ICudaEngine> engine_{nullptr};
std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> contexts_;
std::vector<std::unique_ptr<Bindings>> bindings_;
int device_id_{0};
bool is_dynamic_shape_{false};
std::unordered_map<std::string, ::Tensor> outputs_;
};
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <string>
#include <unordered_map>
#include <vector>
namespace infrt {
namespace backends {
namespace tensorrt {
// Build default params
constexpr int32_t max_batch_not_provided{0};
constexpr int32_t default_workspace{16};
// Inference default params
constexpr int32_t default_batch{1};
constexpr int32_t batch_not_provided{0};
enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
using ShapeRange =
std::array<std::vector<int32_t>,
nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
struct BuildOptions {
// Set max batch size.
int32_t max_batch{max_batch_not_provided};
// Set workspace size in megabytes (default = 16)
int32_t workspace{default_workspace};
// Enable tf32 precision, in addition to fp32 (default = disabled)
bool tf32{false};
// Enable fp16 precision, in addition to fp32 (default = disabled)
bool fp16{false};
// Enable int8 precision, in addition to fp32 (default = disabled)
bool int8{false};
// Control precision constraints. (default = none)
// Precision Constaints: = none, obey, prefer
// none = no constraints
// prefer = meet precision constraints if possible
// obey = meet precision constraints or fail otherwise
PrecisionConstraints precision_constraints{PrecisionConstraints::kNONE};
// Save the serialized engine.
bool save{false};
// Load a serialized engine.
bool load{false};
// Build with dynamic shapes using a profile with the min, max and opt shapes
// provided
std::unordered_map<std::string, ShapeRange> shapes;
// Type and format of each of the input tensors (default = all inputs in
// fp32:chw)
std::vector<IOFormat> input_formats;
// Type and format of each of the output tensors (default = all outputs in
// fp32:chw)
std::vector<IOFormat> output_formats;
};
struct InferenceOptions {
int32_t batch{batch_not_provided};
std::unordered_map<std::string, std::vector<int32_t>> shapes;
};
} // namespace tensorrt
} // namespace backends
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <NvInferRuntimeCommon.h>
#include <glog/logging.h>
#include <algorithm>
#include <cassert>
#include <functional>
#include <memory>
#include <unordered_map>
#include "paddle/phi/core/dense_tensor.h"
namespace infrt {
namespace backends {
namespace tensorrt {
#define IS_TRT_VERSION_GE(version) \
((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version)
#define IS_TRT_VERSION_LT(version) \
((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) < version)
#define TRT_VERSION \
NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD
inline nvinfer1::Dims VecToDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
assert(false);
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims;
dims.nbDims = std::min(static_cast<int>(vec.size()), limit);
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename T>
struct TrtDestroyer {
void operator()(T* t) { t->destroy(); }
};
template <typename T>
using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
class TrtLogger : public nvinfer1::ILogger {
public:
void log(nvinfer1::ILogger::Severity severity,
const char* msg) noexcept override {
switch (severity) {
case Severity::kVERBOSE:
VLOG(3) << msg;
break;
case Severity::kINFO:
VLOG(2) << msg;
break;
case Severity::kWARNING:
LOG(WARNING) << msg;
break;
case Severity::kINTERNAL_ERROR:
case Severity::kERROR:
LOG(ERROR) << msg;
break;
default:
break;
}
}
nvinfer1::ILogger& GetTrtLogger() noexcept { return *this; }
~TrtLogger() override = default;
};
struct Binding {
bool is_input{false};
nvinfer1::DataType data_type{nvinfer1::DataType::kFLOAT};
::Tensor* buffer{nullptr};
std::string name;
};
class Bindings {
public:
Bindings() = default;
void AddBinding(int32_t b,
const std::string& name,
bool is_input,
::Tensor* buffer,
nvinfer1::DataType data_type) {
while (bindings_.size() <= static_cast<size_t>(b)) {
bindings_.emplace_back();
}
names_[name] = b;
bindings_[b].buffer = buffer;
bindings_[b].is_input = is_input;
bindings_[b].data_type = data_type;
bindings_[b].name = name;
}
std::vector<Binding> GetInputBindings() {
return GetBindings([](const Binding& b) -> bool { return b.is_input; });
}
std::vector<Binding> GetOutputBindings() {
return GetBindings([](const Binding& b) -> bool { return !b.is_input; });
}
std::vector<Binding> GetBindings() {
return GetBindings([](const Binding& b) -> bool { return true; });
}
std::vector<Binding> GetBindings(
std::function<bool(const Binding& b)> predicate) {
std::vector<Binding> bindings;
for (const auto& b : bindings_) {
if (predicate(b)) {
bindings.push_back(b);
}
}
return bindings;
}
private:
std::unordered_map<std::string, int32_t> names_;
std::vector<Binding> bindings_;
};
} // namespace tensorrt
} // namespace backends
} // namespace infrt
core_gather_headers()
set(core_includes
"${core_includes};infrt/common/dtype.def"
CACHE INTERNAL "")
gather_srcs(
infrt_src
SRCS
dtype.cc
global.cc
target.cc
type.cc
shared.cc
object.cc
string.cc
buffer.cc
memory.cc)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/buffer.h"
#include <stdarg.h>
#include <stdio.h>
#include <cmath>
namespace infrt {
void Buffer::Resize(uint32_t size) {
if (size_ > 0) {
Free();
size_ = 0;
}
if (size_ != size) {
data_.memory = reinterpret_cast<uint8_t*>(Malloc(size));
size_ = size;
}
}
void Buffer::Resize(uint32_t alignment, uint32_t size) {
if (size_ > 0) {
Free();
size_ = 0;
}
if (size_ != size) {
data_.memory = reinterpret_cast<uint8_t*>(AlignedAlloc(alignment, size));
size_ = size;
}
}
void Buffer::SetTarget(const infrt::common::Target& target) {
target_ = target;
memory_mng_cache_ = MemoryManager::Global().RetrieveSafely(target_.arch);
}
void Buffer::ResizeLazy(uint32_t size) {
if (size <= size_) return;
Resize(size);
}
void Buffer::ResizeLazy(uint32_t alignment, uint32_t size) {
if (size <= size_) return;
Resize(alignment, size);
}
void Buffer::Resize(uint32_t size, const infrt::common::Target& target) {
if (target.arch != target_.arch) {
Free();
SetTarget(target);
}
Resize(size);
}
void Buffer::Resize(uint32_t alignment,
uint32_t size,
const infrt::common::Target& target) {
if (target.arch != target_.arch) {
Free();
SetTarget(target);
}
Resize(alignment, size);
}
void Buffer::ResizeLazy(uint32_t size, const infrt::common::Target& target) {
if (target.arch != target_.arch) {
Free();
SetTarget(target);
}
ResizeLazy(size);
}
void Buffer::ResizeLazy(uint32_t alignment,
uint32_t size,
const infrt::common::Target& target) {
if (target.arch != target_.arch) {
Free();
SetTarget(target);
}
ResizeLazy(alignment, size);
}
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <memory>
#include "paddle/infrt/common/macros.h"
#include "paddle/infrt/common/memory.h"
#include "paddle/infrt/common/target.h"
namespace infrt {
#ifdef __cplusplus
extern "C" {
#endif
#define INFRT_ALWAYS_INLINE __attribute__((always_inline)) inline
//! Code for the primitive types supported in INFRT.
typedef enum infrt_type_code_t {
infrt_type_unk = -1, //! Unknown type
infrt_type_int = 0, //! signed int
infrt_type_uint = 1, //! unsigned int
infrt_type_float = 2, //! floating point
infrt_type_handle = 3 //! void*
} infrt_type_code_t;
#ifndef INFRT_ATTRIBUTE_ALIGN
#define INFRT_ATTRIBUTE_ALIGN(n) __attribute__((aligned(n)))
#endif
/**
* A tuntime tag for type in INFRT system.
*/
typedef struct infrt_type_t {
#if __cplusplus >= 201103L
INFRT_ATTRIBUTE_ALIGN(1) infrt_type_code_t code;
#else
uint8_t code;
#endif
//! Number of bits.
uint8_t bits;
//! Number of elements in a vector, 1 for scalar.
uint16_t lanes;
//! Number of '*', e.g. for `float*`, the num_asterisks is 1, `float**` it is
//! 2.
uint8_t num_asterisks{0};
#ifdef __cplusplus
INFRT_ALWAYS_INLINE infrt_type_t()
: code(infrt_type_int), bits(0), lanes(0) {}
INFRT_ALWAYS_INLINE infrt_type_t(infrt_type_code_t code,
uint8_t bits,
uint16_t lanes = 1,
uint8_t num_asterisks = 0)
: code(code), bits(bits), lanes(lanes), num_asterisks(num_asterisks) {}
INFRT_ALWAYS_INLINE bool operator==(const infrt_type_t& other) const {
return code == other.code && bits == other.bits && lanes == other.lanes;
}
INFRT_ALWAYS_INLINE bool operator!=(const infrt_type_t& other) const {
return !(*this == other);
}
INFRT_ALWAYS_INLINE uint16_t bytes() const { return (bits + 7) / 8; }
#endif // __cplusplus
} infrt_type_t;
//! Help to define the size of a dimension, due to polyhedral representation, we
//! no need to record the extend or
//! min(default to 0).
typedef int infrt_dimension_t;
//! Help to tell the kind of the device.
typedef enum infrt_device_kind_t {
infrt_unk_device = -1, // Undefined device.
infrt_x86_device = 0, // X86 device
infrt_opencl_device = 1, // OpenCL device
infrt_arm_device = 2 // ARM device
} infrt_device_kind_t;
struct infrt_buffer_t;
/**
* All INFRT backends implementation should provide an interface to be used.
*/
struct infrt_device_interface_impl_t;
struct infrt_device_interface_t {
int (*malloc)(void* context, struct infrt_buffer_t* buf);
int (*free)(void* context, struct infrt_buffer_t* buf);
int (*sync)(void* context, struct infrt_buffer_t* buf);
int (*release)(void* context,
const struct infrt_device_interface_t* device_interface);
int (*copy_to_host)(void* context, struct infrt_buffer_t* buf);
int (*copy_to_device)(void* context, struct infrt_buffer_t* buf);
int (*buffer_copy)(void* context,
struct infrt_buffer_t* src,
struct infrt_buffer_t* dst);
struct infrt_device_interface_impl_t* impl;
};
//! The raw representation of a buffer,used in the generated code/lib.
#define INFRT_BUFFER_MAX_DIMS 8
typedef struct infrt_buffer_t {
//! Tell which kind of device this buffer locates.
infrt_device_kind_t device;
//! The interface used to operate on device.
const struct infrt_device_interface_t* device_interface;
//! A pointer to the memory in host.
uint8_t* memory;
//! Extra flags.
uint64_t flag;
//! Data type.
infrt_type_t type;
//! Number of dimensions.
int32_t dimensions;
infrt_dimension_t dims[INFRT_BUFFER_MAX_DIMS];
//! Allocate and deallocate lazily, default true.
char lazy;
//! The actual memory size(in bytes).
uint64_t memory_size;
uint16_t align;
#ifdef __cplusplus
infrt_buffer_t()
: device(infrt_unk_device),
device_interface(NULL),
memory(NULL),
flag(0UL),
type(infrt_type_t()),
dimensions(0),
lazy(true),
memory_size(0),
align(0) {}
static void delete_(struct infrt_buffer_t* x) { delete x; }
~infrt_buffer_t() {}
// NOTE the buffer should be resized first.
static void alloc(struct infrt_buffer_t*);
//! Set the shape of the buffer. NOTE this just record the shape, not allocate
//! the memory.
INFRT_ALWAYS_INLINE void resize(const infrt_dimension_t* dims,
int dimensions) {
this->dimensions = dimensions;
memcpy(this->dims, dims, dimensions * sizeof(infrt_dimension_t));
}
INFRT_ALWAYS_INLINE uint64_t num_elements() const {
uint64_t res = 1;
for (int i = 0; i < dimensions; i++) {
res *= dims[i];
}
return res;
}
INFRT_ALWAYS_INLINE int device_sync(void* ctx = NULL) {
if (device_interface && device_interface->sync) {
return device_interface->sync(ctx, this);
}
return 0;
}
INFRT_ALWAYS_INLINE uint8_t* begin() const { return 0; }
INFRT_ALWAYS_INLINE uint8_t* end() const {
return memory + num_elements() * type.bytes();
}
#endif // __cplusplus
} infrt_buffer_t;
#ifdef __cplusplus
struct infrt_device_interface_impl_t {
int (*malloc)(void* context, struct infrt_buffer_t* buf);
int (*free)(void* context, struct infrt_buffer_t* buf);
int (*sync)(void* context, struct infrt_buffer_t* buf);
int (*release)(void* context);
int (*copy_to_host)(void* context, struct infrt_buffer_t* buf);
int (*copy_to_device)(void* context, struct infrt_buffer_t* buf);
int (*buffer_copy)(void* context,
struct infrt_buffer_t* src,
struct infrt_buffer_t* dst);
};
// The device implementations
extern struct infrt_device_interface_t* infrt_x86_device_interface();
#endif // __cplusplus
#ifdef __cplusplus
} // extern "C"
#endif
#define INFRT_LOG(fmt, ...) \
do { \
fprintf(stderr, \
"%s:%d:%s(): " fmt, \
__FILE__, \
__LINE__, \
__func__, \
__VA_ARGS__); \
} while (0)
#define INFRT_CHECK(cond) \
if (!(cond)) { \
INFRT_LOG("check %s failed", #cond); \
abort(); \
}
/**
* Buffer helps to hold the memory, and offers a set of methods to help manage
* the memory.
*/
struct Buffer final {
Buffer() = default;
explicit Buffer(const common::Target& target) { SetTarget(target); }
//! Resize the memory hold by this buffer *exactlly* to \p size.
void Resize(uint32_t size);
void Resize(uint32_t alignment, uint32_t size);
//! Lazily resize the memory.
void ResizeLazy(uint32_t size);
void ResizeLazy(uint32_t alignment, uint32_t size);
//! Resize the memory to \p size in target \p target.
void Resize(uint32_t size, const common::Target& target);
void Resize(uint32_t alignment, uint32_t size, const common::Target& target);
//! Lazily resize the memory to \p size in target \p target.
void ResizeLazy(uint32_t size, const common::Target& target);
void ResizeLazy(uint32_t alignment,
uint32_t size,
const common::Target& target);
void SetTarget(const common::Target& target);
const infrt_buffer_t* data() const { return &data_; }
infrt_buffer_t* data() { return &data_; }
//! Free all the memory owned by this buffer.
void Free() {
if (!data_.memory) return;
memory_mng_cache_->free(data_.memory);
}
private:
inline void* Malloc(uint32_t size) INFRT_RESULT_SHOULD_USE {
CHECK(memory_mng_cache_) << "Should set target first";
return memory_mng_cache_->malloc(size);
}
inline void* AlignedAlloc(uint32_t alignment,
uint32_t size) INFRT_RESULT_SHOULD_USE {
CHECK(memory_mng_cache_) << "Should set target first";
return memory_mng_cache_->aligned_alloc(alignment, size);
}
private:
infrt_buffer_t data_;
//! The place where this buffer locates.
common::Target target_;
//! Number of bytes of this buffer.
uint32_t size_{};
//! Hold the corresponding memory manager for speed.
MemoryInterface* memory_mng_cache_{};
};
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/infrt/common/macros.h"
#include "paddle/infrt/common/shared.h"
#include "paddle/infrt/common/target.h"
#include "paddle/infrt/common/type.h"
namespace infrt {
// export some general concepts.
using common::make_shared;
using common::Object;
using common::ref_count;
using common::Shared;
// Type related.
using common::Bool;
using common::Float;
using common::Int;
using common::UInt;
using common::Void;
using common::type_of;
using common::Target;
using common::Type;
using common::UnkTarget;
template <typename T>
T& Reference(const T* x) {
return *const_cast<T*>(x);
}
static void CheckVarNameValid(const std::string& name) {
CHECK(!name.empty());
CHECK(name.find(' ') == std::string::npos && //
name.find('.') == std::string::npos && //
name.find('/') == std::string::npos && //
name.find('\t') == std::string::npos && //
name.find('\n') == std::string::npos && //
name.find('\r') == std::string::npos)
<< "Some invalid character found";
}
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/dtype.h"
namespace infrt {
const char* DType::name() const {
switch (kind_) {
case Kind::Unk:
return "Unk";
break;
#define INFRT_DTYPE(enum__, value__) \
case Kind::enum__: \
return #enum__; \
break;
#include "paddle/infrt/common/dtype.def"
#undef INFRT_DTYPE
}
return "";
}
size_t DType::GetHostSize() const {
switch (kind_) {
#define INFRT_DTYPE(enum__, value__) \
case DType::Kind::enum__: \
return sizeof(DTypeInternal<DType::Kind::enum__>::type);
#include "paddle/infrt/common/dtype.def" // NOLINT
#undef INFRT_DTYPE
case Kind::Unk:
return 0;
break;
}
return 0;
}
} // namespace infrt
// Define all INFRT dtypes
// DTYPE(ENUM, VALUE)
#ifdef INFRT_DTYPE
INFRT_DTYPE(UI8, 1)
INFRT_DTYPE(UI16, 2)
INFRT_DTYPE(UI32, 3)
INFRT_DTYPE(UI64, 4)
INFRT_DTYPE(I1, 5)
INFRT_DTYPE(I8, 6)
INFRT_DTYPE(I16, 7)
INFRT_DTYPE(I32, 8)
INFRT_DTYPE(I64, 9)
INFRT_DTYPE(F32, 10)
INFRT_DTYPE(F64, 11)
INFRT_DTYPE(STRING, 12)
#endif
\ No newline at end of file
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <string>
namespace infrt {
class DType {
public:
enum class Kind : uint8_t {
Unk = 0,
// Automatically generate the enum definition
#define INFRT_DTYPE(enum__, value__) enum__ = value__,
#include "paddle/infrt/common/dtype.def"
#undef INFRT_DTYPE
BOOL = I1,
};
DType() = default;
explicit constexpr DType(Kind kind) : kind_(kind) { assert(IsValid()); }
DType(const DType&) = default;
DType& operator=(const DType&) = default;
bool operator==(DType other) const { return kind_ == other.kind_; }
bool operator!=(DType other) const { return !(*this == other); }
constexpr Kind kind() const { return kind_; }
bool IsValid() const { return kind_ != Kind::Unk; }
bool IsInvalid() const { return !IsValid(); }
const char* name() const;
size_t GetHostSize() const;
private:
Kind kind_{Kind::Unk};
};
template <typename T>
constexpr DType GetDType();
template <DType::Kind kind>
struct DTypeInternal;
#define INFRT_IMPL_GET_DTYPE(cpp_type__, enum__) \
template <> \
inline constexpr DType GetDType<cpp_type__>() { \
return DType{DType::Kind::enum__}; \
} \
template <> \
struct DTypeInternal<DType::Kind::enum__> { \
using type = cpp_type__; \
};
INFRT_IMPL_GET_DTYPE(bool, I1);
INFRT_IMPL_GET_DTYPE(int8_t, I8);
INFRT_IMPL_GET_DTYPE(int16_t, I16);
INFRT_IMPL_GET_DTYPE(int32_t, I32);
INFRT_IMPL_GET_DTYPE(int64_t, I64);
INFRT_IMPL_GET_DTYPE(uint8_t, UI8);
INFRT_IMPL_GET_DTYPE(uint16_t, UI16);
INFRT_IMPL_GET_DTYPE(uint32_t, UI32);
INFRT_IMPL_GET_DTYPE(uint64_t, UI64);
INFRT_IMPL_GET_DTYPE(float, F32);
INFRT_IMPL_GET_DTYPE(double, F64);
INFRT_IMPL_GET_DTYPE(std::string, STRING);
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/global.h"
namespace infrt {
Global::Global() {}
mlir::MLIRContext* Global::context = nullptr;
mlir::MLIRContext* Global::getMLIRContext() {
if (nullptr == context) {
context = new mlir::MLIRContext();
}
return context;
}
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mlir/IR/MLIRContext.h>
#include "paddle/infrt/tensor/dense_host_tensor.h"
namespace infrt {
// global variables
class Global {
private:
static mlir::MLIRContext *context;
Global();
public:
static mlir::MLIRContext *getMLIRContext();
}; // class Global
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if !defined(NDEBUG)
#define INFRT_DEBUG
#endif
#define INFRT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&) = delete; \
void operator=(const TypeName&) = delete
#ifndef INFRT_NOT_IMPLEMENTED
#define INFRT_NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented";
#endif
#define INFRT_RESULT_SHOULD_USE __attribute__((warn_unused_result))
/**
* A trick to enforce the registry.
*
* usage:
*
* INFRT_REGISTER_HELPER(some_key) {
* // register methods
* }
*
* INFRT_USE_REGISTER(some_key);
*/
#define INFRT_REGISTER_HELPER(symbol__) bool __infrt__##symbol__##__registrar()
#define INFRT_USE_REGISTER(symbol__) \
extern bool __infrt__##symbol__##__registrar(); \
[[maybe_unused]] static bool __infrt_extern_registrar_##symbol__ = \
__infrt__##symbol__##__registrar();
#if __cplusplus >= 201703L
#define INFRT_NODISCARD [[nodiscard]]
#else
#define INFRT_NODISCARD
#endif
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/memory.h"
namespace infrt {
using infrt::common::Target;
namespace {
class X86MemoryMng : public MemoryInterface {
public:
void* malloc(size_t nbytes) override { return ::malloc(nbytes); }
void free(void* data) override {
if (!data) return;
::free(data);
}
void* aligned_alloc(size_t alignment, size_t nbytes) override {
return ::aligned_alloc(alignment, nbytes);
}
};
} // namespace
MemoryManager::MemoryManager() {
Register(Target::Arch::Unk, new X86MemoryMng);
Register(Target::Arch::X86, new X86MemoryMng);
}
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <memory>
#include <unordered_map>
#include "paddle/infrt/common/macros.h"
#include "paddle/infrt/common/target.h"
namespace infrt {
class MemoryInterface {
public:
virtual void* malloc(size_t nbytes) = 0;
virtual void free(void* data) = 0;
virtual void* aligned_alloc(size_t alignment, size_t nbytes) {
return nullptr;
}
virtual ~MemoryInterface() {}
};
/**
* MemoryManager holds a map of MemoryInterface for each articture.
*/
class MemoryManager final {
public:
using key_t = common::Target::Arch;
static MemoryManager& Global() {
static auto* x = new MemoryManager;
return *x;
}
MemoryInterface* Retrieve(key_t key) INFRT_RESULT_SHOULD_USE {
auto it = memory_mngs_.find(key);
if (it != memory_mngs_.end()) return it->second.get();
return nullptr;
}
MemoryInterface* RetrieveSafely(key_t key) {
auto* res = Retrieve(key);
CHECK(res) << "no MemoryInterface for architecture [" << key << "]";
return res;
}
MemoryInterface* Register(key_t key, MemoryInterface* item) {
CHECK(!memory_mngs_.count(key)) << "Duplicate register [" << key << "]";
memory_mngs_[key].reset(item);
return item;
}
private:
MemoryManager();
std::unordered_map<common::Target::Arch, std::unique_ptr<MemoryInterface>>
memory_mngs_;
INFRT_DISALLOW_COPY_AND_ASSIGN(MemoryManager);
};
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/object.h"
namespace infrt {
namespace common {} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <iostream>
#include "paddle/infrt/common/shared.h"
namespace infrt {
namespace common {
template <typename T>
class Shared;
/**
* Object is the basic element in the INFRT, with `Shared` wrapper, the object
* can be shared across the system.
*/
struct Object {
//! Get the type representation of this object.
virtual const char* type_info() const = 0;
virtual ~Object() {}
//! Cast to a derived type.
template <typename T>
T* as() {
return static_cast<T*>(this);
}
//! Cast to a derived type.
template <typename T>
const T* as() const {
return static_cast<const T*>(this);
}
//! Type safe cast.
template <typename T>
T* safe_as() {
if (std::strcmp(type_info(), T::__type_info__) == 0) {
return static_cast<T*>(this);
}
return nullptr;
}
//! Type safe cast.
template <typename T>
const T* safe_as() const {
if (std::strcmp(type_info(), T::__type_info__) == 0) {
return static_cast<const T*>(this);
}
return nullptr;
}
//! Check if the type is right.
template <typename T>
bool is_type() const {
if (std::strcmp(type_info(), T::__type_info__) == 0) {
return true;
}
return false;
}
//! The reference count, which make all the derived type able to share.
mutable RefCount __ref_count__;
};
using object_ptr = Object*;
using shared_object = Shared<Object>;
} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/shared.h"
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <atomic>
#include <string>
#include <type_traits>
namespace infrt {
namespace common {
class RefCount {
public:
using value_type = int32_t;
RefCount() = default;
value_type Inc() { return ++count_; }
value_type Dec() { return --count_; }
bool is_zero() const { return 0 == count_; }
std::string to_string() { return std::to_string(count_.load()); }
int32_t val() const { return count_; }
private:
std::atomic<value_type> count_{0};
};
class Object;
/**
* The templated methods are used to unify the way to get the RefCount instance
* in client classes.
*/
template <typename T>
RefCount& ref_count(const T* t) {
static_assert(std::is_base_of<Object, T>::value, "T is not a Object");
return t->__ref_count__;
}
template <typename T>
void Destroy(const T* t) {
delete t;
}
template <typename T>
struct Shared {
using object_ptr = T*;
Shared() = default;
explicit Shared(T* p) : p_(p) {
if (p) IncRef(p);
}
Shared(const Shared& other) : p_(other.p_) { IncRef(p_); }
Shared(Shared&& other) : p_(other.p_) { other.p_ = nullptr; }
Shared<T>& operator=(const Shared<T>& other);
//! Reset to another pointer \p x.
void Reset(T* x = nullptr);
//! Access the pointer in various ways.
// @{
inline T* get() const { return p_; }
inline T& operator*() const { return *p_; }
inline T* operator->() const { return p_; }
inline T* self() { return p_; }
inline const T* self() const { return p_; }
// @}
inline bool same_as(const Shared& other) { return p_ == other.p_; }
inline bool defined() const { return p_; }
inline bool operator<(const Shared& other) const { return p_ < other.p_; }
inline Shared<T>& operator=(T* x);
inline bool operator==(const Shared& other) const { return p_ == other.p_; }
~Shared();
private:
//! Increase the share count.
void IncRef(T* p);
//! Decrease the share count.
void DecRef(T* p);
protected:
T* p_{};
};
template <typename T>
void Shared<T>::IncRef(T* p) {
if (p) {
ref_count(p).Inc();
}
}
template <typename T>
void Shared<T>::DecRef(T* p) {
if (p) {
if (ref_count(p).Dec() == 0) {
Destroy(p);
}
}
}
template <typename T>
Shared<T>& Shared<T>::operator=(const Shared<T>& other) {
if (other.p_ == p_) return *this;
// Other can be inside of something owned by this, so we should be careful to
// incref other before we decref
// ourselves.
T* tmp = other.p_;
IncRef(tmp);
DecRef(p_);
p_ = tmp;
return *this;
}
template <typename T, typename... Args>
T* make_shared(Args&&... args) {
return new T(args...);
}
template <typename T>
Shared<T>& Shared<T>::operator=(T* x) {
if (p_ == x) return *this;
T* tmp = x;
IncRef(tmp);
DecRef(p_);
p_ = tmp;
return *this;
}
template <typename T>
Shared<T>::~Shared() {
DecRef(p_);
p_ = nullptr;
}
template <typename T>
void Shared<T>::Reset(T* x) {
if (x) IncRef(x);
DecRef(p_);
p_ = x;
}
} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/string.h"
#include <stdarg.h>
#include <cstring>
namespace infrt {
namespace infrt {
std::string StringFormat(const std::string &fmt_str, ...) {
/* Reserve two times as much as the length of the fmt_str */
int final_n, n = (static_cast<int>(fmt_str.size())) * 2;
std::unique_ptr<char[]> formatted;
va_list ap;
while (1) {
formatted.reset(
new char[n]); /* Wrap the plain char array into the unique_ptr */
std::strcpy(&formatted[0], fmt_str.c_str()); // NOLINT
va_start(ap, fmt_str);
final_n = vsnprintf(&formatted[0], n, fmt_str.c_str(), ap);
va_end(ap);
if (final_n < 0 || final_n >= n)
n += abs(final_n - n + 1);
else
break;
}
return std::string(formatted.get());
}
std::string Trim(const std::string &s, const char *empty) {
if (s.empty()) return s;
auto start = s.find_first_not_of(empty);
if (start == std::string::npos) return "";
auto end = s.find_last_not_of(empty);
return s.substr(start, end - start + 1);
}
std::string Uppercase(const std::string &x) {
auto res = x;
for (auto &c : res) {
c = toupper(c);
}
return res;
}
bool Startswith(const std::string &x, const std::string &str) {
return x.find(str) == 0;
}
bool Endswith(const std::string &x, const std::string &str) {
if (x.length() >= str.length()) {
return std::equal(str.rbegin(), str.rend(), x.rbegin());
}
return false;
}
std::vector<std::string> Split(const std::string &str,
const std::string &splitter) {
std::vector<std::string> results;
std::string::size_type pos1, pos2;
pos2 = str.find(splitter);
pos1 = 0;
while (std::string::npos != pos2) {
results.push_back(str.substr(pos1, pos2 - pos1));
pos1 = pos2 + splitter.size();
pos2 = str.find(splitter, pos1);
}
if (pos1 != str.length()) {
results.push_back(str.substr(pos1));
}
return results;
}
void Replace(std::string *s, const std::string &from, const std::string &to) {
size_t pos = 0;
while ((pos = s->find(from, pos)) != std::string::npos) {
s->replace(pos, from.size(), to);
pos += to.length();
}
}
size_t Count(std::string *s, const std::string &sub) {
size_t pos = 0;
size_t times = 0;
while ((pos = s->find(sub, pos)) != std::string::npos) {
if ((pos == 0 || !IsPrefix(s->at(pos - 1))) &&
(pos + sub.length() == s->size() ||
!IsSuffix(s->at(pos + sub.length())))) {
pos += sub.length();
times++;
} else {
pos++;
}
}
return times;
}
bool IsPrefix(const char &c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
}
bool IsSuffix(const char &c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') ||
(c >= '0' && c <= '9') || (c == '\'');
}
std::string TransValidVarName(std::string name) {
Replace(&name, ".", "__");
Replace(&name, "/", "___");
name.erase(0, name.find_first_not_of("_"));
return name;
}
} // namespace infrt
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <sstream>
#include <string>
#include <vector>
namespace infrt {
namespace infrt {
//! Get the content of a stream.
template <typename T>
std::string GetStreamCnt(const T& x);
/**
* Construct a formatted string with arguments.
* @param fmt_str The format.
* @param ... The parameters of the format.
* @return The formated string.
*/
std::string StringFormat(const std::string& fmt_str, ...);
/**
* Join multiple fields to a single string. Similar to Python's str.join method.
*/
template <typename T = std::string>
std::string Join(const std::vector<T>& fields, const std::string& splitter) {
if (fields.empty()) return "";
std::stringstream ss;
for (int i = 0; i < fields.size() - 1; i++) ss << fields[i] << splitter;
ss << fields.back();
return ss.str();
}
std::vector<std::string> Split(const std::string& str,
const std::string& splitter);
std::string Trim(const std::string& s, const char* empty = " \n\r\t");
//! Convert a string to its uppercase.
std::string Uppercase(const std::string& x);
//! Replace a substr 'from' to 'to' in string s.
void Replace(std::string* s, const std::string& from, const std::string& to);
//! Count how many times substr 'sub' appears in string s.
size_t Count(std::string* s, const std::string& sub);
//! Tell if a char is prefix of a tensor's name.
bool IsPrefix(const char& c);
//! Tell if a char is suffix of a tensor's name.
bool IsSuffix(const char& c);
//! Tell if a string \p x start with \p str.
bool Startswith(const std::string& x, const std::string& str);
//! Tell if a string \p x ends with \p str.
bool Endswith(const std::string& x, const std::string& str);
template <typename T>
std::string GetStreamCnt(const T& x) {
std::stringstream os;
os << x;
return os.str();
}
std::string TransValidVarName(std::string name);
} // namespace infrt
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/target.h"
#include <glog/logging.h>
namespace infrt {
namespace common {
bool Target::operator==(const Target &other) const {
return os == other.os && //
arch == other.arch && //
bits == other.bits && //
features == other.features;
}
int Target::max_num_threads() const {
CHECK(arch == Arch::NVGPU)
<< "The target is not NVGPU! Cannot get max number of threads.";
return 1024;
}
std::vector<Target::Lib> Target::get_target_libs() const { return libs; }
int Target::get_target_bits() const {
switch (bits) {
case Bit::k32:
return 32;
case Bit::k64:
return 64;
case Bit::Unk:
return 0;
default:
LOG(FATAL) << "Not supported Bit";
}
return -1;
}
std::ostream &operator<<(std::ostream &os, const Target &target) {
os << "Target<";
switch (target.os) {
case Target::OS::Linux:
os << "linux";
break;
case Target::OS::Windows:
os << "windows";
break;
case Target::OS::Unk:
os << "unk";
break;
}
os << ",";
switch (target.arch) {
case Target::Arch::X86:
os << "x86";
break;
case Target::Arch::ARM:
os << "arm";
break;
case Target::Arch::NVGPU:
os << "nvgpu";
break;
case Target::Arch::Unk:
os << "unk";
break;
}
os << ",";
switch (target.bits) {
case Target::Bit::k32:
os << "32";
break;
case Target::Bit::k64:
os << "64";
break;
case Target::Bit::Unk:
os << "unk";
break;
}
os << ">";
return os;
}
std::ostream &operator<<(std::ostream &os, Target::Arch arch) {
switch (arch) {
case Target::Arch::Unk:
os << "Unk";
break;
case Target::Arch::X86:
os << "X86";
break;
case Target::Arch::ARM:
os << "ARM";
break;
case Target::Arch::NVGPU:
os << "NVGPU";
break;
}
return os;
}
} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ostream>
#include <vector>
namespace infrt {
namespace common {
struct Target {
/**
* The operating system used by the target. Determines which system calls to
* generate.
*/
enum class OS : int {
Unk = -1,
Linux,
Windows,
};
/**
* The architecture used by the target. Determines the instruction set to use.
*/
enum class Arch : int {
Unk = -1,
X86,
ARM,
NVGPU,
};
enum class Bit : int {
Unk = -1,
k32,
k64,
};
OS os{OS::Unk};
Arch arch{Arch::Unk};
Bit bits{Bit::Unk};
enum class Feature : int {
JIT = 0,
Debug,
};
/**
* The library used by the target.
*/
enum class Lib : int {
Unk = -1,
MKL,
};
std::vector<Feature> features;
std::vector<Lib> libs;
explicit Target(OS o = OS::Linux,
Arch a = Arch::Unk,
Bit b = Bit::Unk,
const std::vector<Feature>& features = {},
const std::vector<Lib>& libs = {})
: os(o), arch(a), bits(b), features(features), libs(libs) {}
bool defined() const {
return os != OS::Unk && arch != Arch::Unk && bits != Bit::Unk;
}
int max_num_threads() const;
int get_target_bits() const;
std::vector<Lib> get_target_libs() const;
bool operator==(const Target& other) const;
bool operator!=(const Target& other) const { return !(*this == other); }
friend std::ostream& operator<<(std::ostream& os, const Target& target);
};
static const Target& UnkTarget() {
static Target target(
Target::OS::Unk, Target::Arch::Unk, Target::Bit::Unk, {}, {});
return target;
}
static const Target& DefaultHostTarget() {
static Target target(
Target::OS::Linux, Target::Arch::X86, Target::Bit::k64, {}, {});
return target;
}
static const Target& DefaultNVGPUTarget() {
static Target target(
Target::OS::Linux, Target::Arch::NVGPU, Target::Bit::k64, {}, {});
return target;
}
std::ostream& operator<<(std::ostream& os, Target::Arch arch);
} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/common/type.h"
#include <utility>
namespace infrt {
namespace common {
struct Type::Storage {
Storage() = default;
Storage(type_t t, int b, int w) : type_(t), bits_(b), lanes_(w) {}
type_t type_{type_t::Unk};
cpp_type_t cpp_type_{cpp_type_t::None};
//! How many bits per element.
int bits_{};
//! How many elements(if a vector type), for scalar types, it should be 1.
int lanes_{1};
//! Name of the customized type.
std::string customized_type_;
};
Type::~Type() {}
std::ostream &operator<<(std::ostream &os, const Type &t) {
if (t.is_cpp_const()) os << "const ";
switch (t.type()) {
case Type::type_t::Int:
if (t.bits() == 1) {
os << "bool";
} else {
os << "int" << t.bits();
}
break;
case Type::type_t::UInt:
os << "uint" << t.bits();
break;
case Type::type_t::Float:
os << "float" << t.bits();
break;
case Type::type_t::Void:
os << "void";
break;
case Type::type_t::Customized:
os << t.customized_type();
break;
case Type::type_t::String:
os << "string";
break;
case Type::type_t::Unk:
os << "unk";
break;
}
if (t.lanes() > 1) os << "<" << t.lanes() << ">";
if (t.is_cpp_handle()) os << "*";
if (t.is_cpp_handle2()) os << "**";
return os;
}
std::ostream &operator<<(std::ostream &os, Type::type_t t) {
switch (t) {
case Type::type_t::String:
os << "String";
break;
case Type::type_t::Void:
os << "Void";
break;
case Type::type_t::UInt:
os << "UInt";
break;
case Type::type_t::Int:
os << "Int";
break;
case Type::type_t::Float:
os << "Float";
break;
case Type::type_t::Unk:
os << "Unk";
break;
case Type::type_t::Customized:
os << "Customized";
}
return os;
}
Type &Type::set_cpp_handle(bool x) {
// unset the other handle-related bits.
set_cpp_handle2(false);
auto &v = (*reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_));
// unset the other handle-related bits.
v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
if (x)
v |= static_cast<uint8_t>(cpp_type_t::Handle);
else
v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
return *this;
}
Type &Type::set_cpp_handle2(bool x) {
auto &v = (*reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_));
// unset the other handle-related bits.
v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
if (x)
v |= static_cast<uint8_t>(cpp_type_t::HandleHandle);
else
v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
return *this;
}
Type Type::VectorOf(int w) const {
CheckTypeValid();
return Type(type(), w, bits());
}
Type::Type(const Type &other) {
if (other.storage_) storage_.reset(new Storage(*other.storage_));
}
Type Type::ElementOf() const {
CheckTypeValid();
auto type = *this;
type.storage_->lanes_ = 1;
return type;
}
void Type::CheckTypeValid() const { CHECK_NE(GetStorage().type_, type_t::Unk); }
Type Type::PointerOf() const {
CheckTypeValid();
auto x = *this;
CHECK(!x.is_cpp_handle2()) << "Not support three level of PointerOf";
if (x.is_cpp_handle())
x.set_cpp_handle2();
else
x.set_cpp_handle();
return x;
}
Type Type::ConstOf() const {
CheckTypeValid();
auto x = *this;
x.set_cpp_const();
return x;
}
Type Type::IgnoreConst() const {
CheckTypeValid();
auto x = *this;
x.set_cpp_const(false);
return x;
}
Type Type::with_bits(int x) const {
CHECK(is_primitive());
Type type = *this;
type.GetStorage().bits_ = x;
return type;
}
Type Type::with_type(Type::type_t x) const {
Type type = *this;
type.GetStorage().type_ = x;
return type;
}
Type Type::with_lanes(int x) const {
CHECK(valid());
Type type = *this;
type.GetStorage().lanes_ = x;
return type;
}
Type Type::with_cpp_const(bool x) const {
Type type = *this;
type.set_cpp_const(x);
return type;
}
Type &Type::set_cpp_const(bool is_const) {
uint8_t &data = *reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_);
if (is_const) {
data |= static_cast<uint8_t>(cpp_type_t::Const);
} else {
data &= ~(static_cast<uint8_t>(cpp_type_t::Const));
}
return *this;
}
Type &Type::set_customized_type(const std::string &t) {
GetStorage().type_ = type_t::Customized;
GetStorage().customized_type_ = t;
return *this;
}
bool Type::valid() const {
if (is_unk()) return false;
if (is_customized()) {
return !GetStorage().customized_type_.empty();
}
if (is_primitive()) {
return bits() != 0;
}
return true;
}
Type::Type(Type::type_t t, int b, int w) : storage_(new Storage(t, b, w)) {}
bool Type::is_primitive() const {
return !is_unk() && type() != type_t::Customized;
}
bool Type::is_customized() const {
return !is_unk() && type() == type_t::Customized;
}
bool Type::is_unk() const { return type() == type_t::Unk; }
bool Type::is_bool() const { return type() == type_t::UInt && bits() == 1; }
bool Type::is_void() const { return type() == type_t::Void; }
bool Type::is_vector() const { return lanes() > 1; }
bool Type::is_scalar() const { return lanes() == 1; }
bool Type::is_float(int bits) const {
return type() == type_t::Float && (bits < 0 || bits == this->bits());
}
bool Type::is_uint(int bits) const {
return type() == type_t::UInt && (bits < 0 || bits == this->bits());
}
bool Type::is_int(int bits) const {
return type() == type_t::Int && (bits < 0 || bits == this->bits());
}
bool Type::is_integer(int bits) const {
return (type() == type_t::Int || type() == type_t::UInt) &&
(bits < 0 || bits == this->bits());
}
bool Type::is_index_type() {
return is_int() && lanes() == 1 && (bits() == 32 || bits() == 64);
}
bool Type::is_cpp_handle() const {
return static_cast<uint8_t>(GetStorage().cpp_type_) &
static_cast<uint8_t>(cpp_type_t::Handle);
}
bool Type::is_cpp_handle2() const {
return static_cast<uint8_t>(GetStorage().cpp_type_) &
static_cast<uint8_t>(cpp_type_t::HandleHandle);
}
bool Type::is_cpp_const() const {
return static_cast<uint8_t>(cpp_type_t::Const) &
static_cast<uint8_t>(GetStorage().cpp_type_);
}
const std::string &Type::customized_type() const {
return GetStorage().customized_type_;
}
bool Type::is_customized_type() const {
return !GetStorage().customized_type_.empty();
}
Type::type_t Type::type() const { return GetStorage().type_; }
int Type::bits() const { return GetStorage().bits_; }
int Type::lanes() const { return GetStorage().lanes_; }
Type::cpp_type_t Type::cpp_type() const { return GetStorage().cpp_type_; }
bool Type::operator==(const Type &other) const {
return type() == other.type() && bits() == other.bits() &&
lanes() == other.lanes() &&
GetStorage().cpp_type_ == other.GetStorage().cpp_type_ &&
customized_type() == other.customized_type();
}
bool Type::is_string() const { return type() == type_t::String; }
Type &Type::operator=(const Type &other) {
if (other.storage_) storage_.reset(new Storage(*other.storage_));
return *this;
}
Type::Storage &Type::GetStorage() { return *storage_; }
const Type::Storage &Type::GetStorage() const { return *storage_; }
Type::Type() : storage_(new Storage) {}
Type::Type(Type &&other) : storage_(std::move(other.storage_)) {}
const Type &F16() {
static auto t = Float(16);
return t;
}
const Type &F32() {
static auto t = Float(32);
return t;
}
const Type &F64() {
static auto t = Float(64);
return t;
}
const Type &I8() {
static auto t = Int(8);
return t;
}
const Type &I16() {
static auto t = Int(16);
return t;
}
const Type &I32() {
static auto t = Int(32);
return t;
}
const Type &I64() {
static auto t = Int(64);
return t;
}
const Type &UI8() {
static auto t = UInt(8);
return t;
}
const Type &UI16() {
static auto t = UInt(16);
return t;
}
const Type &UI32() {
static auto t = UInt(32);
return t;
}
const Type &UI64() {
static auto t = UInt(64);
return t;
}
const Type &I1() {
static auto t = Int(1);
return t;
}
const Type &UI1() {
static auto t = UInt(1);
return t;
}
} // namespace common
} // namespace infrt
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <memory>
#include <string>
#include "paddle/infrt/common/macros.h"
//! Much of the concepts are borrowed from Halide project.
namespace infrt {
namespace common {
/**
* Types in the INFRT type system. They can be ints, unsigned ints, or floats of
* various bit-widths.
* They can also be vectors of the same (by setting the `lanes` field to
* something larger than one).
* NOTE: Front-end code other than vectorize shouldn't use vector types.
*/
struct Type {
enum class type_t {
Unk = -1,
Int,
UInt,
Float,
String,
Void,
// stupid idea to mix the Customized with other primitive types, large
// refactor needs here.
Customized, // Customized type
};
//! type decorators in C++, the different code can used together.
enum class cpp_type_t : uint8_t {
None = 0, // None information.
Const = 1, // const.
Handle = 1 << 1, // pointer type, such as `infrt_buffer_t*`.
HandleHandle = 1 << 2, // pointer of pointer, such as `infrt_buffer_t**`.
};
Type();
Type(type_t t, int b, int w);
Type(const Type& other);
explicit Type(Type&& other);
Type& operator=(const Type& other);
INFRT_NODISCARD bool is_primitive() const;
INFRT_NODISCARD bool is_customized() const;
INFRT_NODISCARD bool valid() const;
//! Some helper functions to check a type.
// @{
INFRT_NODISCARD bool is_unk() const;
INFRT_NODISCARD bool is_void() const;
INFRT_NODISCARD bool is_bool() const;
INFRT_NODISCARD bool is_vector() const;
INFRT_NODISCARD bool is_scalar() const;
INFRT_NODISCARD bool is_float(int bits = -1) const;
INFRT_NODISCARD bool is_int(int bits = -1) const;
INFRT_NODISCARD bool is_integer(int bits = -1) const;
INFRT_NODISCARD bool is_uint(int bits = -1) const;
INFRT_NODISCARD bool is_string() const;
INFRT_NODISCARD bool is_index_type();
// @}
Type& set_cpp_handle(bool x = true);
INFRT_NODISCARD bool is_cpp_handle() const;
Type& set_cpp_handle2(bool x = true);
INFRT_NODISCARD bool is_cpp_handle2() const;
Type& set_cpp_const(bool is_const = true);
INFRT_NODISCARD bool is_cpp_const() const;
Type& set_customized_type(const std::string& t);
const std::string& customized_type() const;
INFRT_NODISCARD bool is_customized_type() const;
// Get a new type with bits set to \p x.
Type with_bits(int x) const;
// Get a new type with type set to \p x.
Type with_type(type_t x) const;
// Get a new type with lanes set to \p x.
Type with_lanes(int x) const;
// Get a new type with cpp_const set to \p x.
Type with_cpp_const(bool x = true) const;
//! Getters
// @{
type_t type() const;
int bits() const;
int lanes() const;
cpp_type_t cpp_type() const;
// @}
//! Compare two types for equality.
bool operator==(const Type& other) const;
//! Compare two types for inequality.
bool operator!=(const Type& other) const { return !(*this == other); }
//! Generate a vector of this type, with `w` elements.
Type VectorOf(int w) const;
//! Generate a element type of this type.
Type ElementOf() const;
//! Generate the address type.
Type PointerOf() const;
//! Ignore const.
Type IgnoreConst() const;
//! Add const.
Type ConstOf() const;
friend std::ostream& operator<<(std::ostream& os, const Type& t);
~Type();
private:
void CheckTypeValid() const;
struct Storage;
Storage& GetStorage();
const Storage& GetStorage() const;
std::unique_ptr<Storage> storage_;
}; // namespace common
inline Type Void() { return Type(Type::type_t::Void, 1, 0); }
inline Type Int(int bits, int lanes = 1) {
return Type(Type::type_t::Int, bits, lanes);
}
inline Type UInt(int bits, int lanes = 1) {
return Type(Type::type_t::UInt, bits, lanes);
}
inline Type Float(int bits, int lanes = 1) {
return Type(Type::type_t::Float, bits, lanes);
}
inline Type Bool(int lanes = 1) { return Type(Type::type_t::UInt, 1, lanes); }
inline Type String() { return Type(Type::type_t::String, 1, 1); }
//! Builtin native types as global singletons.
// @{
const Type& F16();
const Type& F32();
const Type& F64();
const Type& I8();
const Type& I16();
const Type& I32();
const Type& I64();
const Type& UI8();
const Type& UI16();
const Type& UI32();
const Type& UI64();
const Type& I1();
const Type& UI1();
// @}
template <typename T>
Type type_of();
template <>
inline Type type_of<float>() {
return F32();
}
template <>
inline Type type_of<double>() {
return F64();
}
template <>
inline Type type_of<unsigned char>() {
return UI8();
}
template <>
inline Type type_of<int16_t>() {
return UI16();
}
template <>
inline Type type_of<int32_t>() {
return I32();
}
template <>
inline Type type_of<uint32_t>() {
return UI32();
}
template <>
inline Type type_of<bool>() {
return UI1();
}
template <>
inline Type type_of<char>() {
return I8();
}
template <>
inline Type type_of<int64_t>() {
return I64();
}
template <>
inline Type type_of<uint64_t>() {
return UI64();
}
template <>
inline Type type_of<signed char>() {
return I8();
}
template <>
inline Type type_of<void>() {
return Void();
}
template <>
inline Type type_of<int8_t*>() {
Type x = Int(8);
x.set_cpp_handle();
return x;
}
template <>
inline Type type_of<void*>() {
Type x = type_of<void>();
x.set_cpp_handle();
return x;
}
template <>
inline Type type_of<void**>() {
Type x = type_of<void>();
x.set_cpp_handle2();
return x;
}
template <>
inline Type type_of<float*>() {
Type x = type_of<float>();
x.set_cpp_handle();
return x;
}
template <>
inline Type type_of<double*>() {
Type x = type_of<double>();
x.set_cpp_handle();
return x;
}
std::ostream& operator<<(std::ostream& os, Type::type_t t);
} // namespace common
} // namespace infrt
core_gather_headers()
gather_srcs(
infrt_src
SRCS
dialect.cc
init_dialects.cc
tensor_shape.cc
dense_tensor.cc
mlir_loader.cc
diagnostic_utils.cc)
mlir_tablegen_on(tensor_shape DIALECT ts)
mlir_tablegen_on(dense_tensor DIALECT dt)
# TODO(Superjomn) add a cmake function cc_executable to ecapsulate the following code
add_executable(infrtopt opt.cc)
target_link_libraries(infrtopt infrt)
add_executable(print-ir print_ir.cc)
target_link_libraries(print-ir infrt ${mlir_libs})
cc_test_tiny(test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt
${MLIR_IR_LIBS})
add_subdirectory(infrt)
add_subdirectory(pd)
add_subdirectory(tensorrt)
if(INFRT_WITH_PHI)
add_subdirectory(phi)
endif()
此差异已折叠。
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mlir/IR/Dialect.h>
#include <mlir/IR/OpDefinition.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include <string>
#include "paddle/infrt/dialect/dense_tensor_dialect.hpp.inc"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/dense_tensor.hpp.inc"
此差异已折叠。
此差异已折叠。
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <llvm/Support/SourceMgr.h>
#include <mlir/IR/Diagnostics.h>
#include <memory>
namespace infrt {
namespace dialect {
/**
* A scoped diagnostic handler to help debug MLIR process.
*/
class MyScopedDiagnosicHandler : public mlir::SourceMgrDiagnosticHandler {
public:
MyScopedDiagnosicHandler(mlir::MLIRContext* ctx, bool propagate);
mlir::LogicalResult handler(mlir::Diagnostic* diag);
~MyScopedDiagnosicHandler();
private:
class Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace dialect
} // namespace infrt
此差异已折叠。
add_subdirectory(common)
add_subdirectory(ir)
add_subdirectory(pass)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册