未验证 提交 1b84c0bf 编写于 作者: W Wilber 提交者: GitHub

Lite subgraph refine predictor (#27167)

上级 2e597696
...@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -34,7 +34,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite) set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
if(NOT LITE_GIT_TAG) if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG dfdfa6440c83bf0b415f9f5a9ff84842ce0bb0fa) set(LITE_GIT_TAG 6d2b2a4028a58715b01887b04eb9bff8432eb184)
endif() endif()
if(NOT CUDA_ARCH_NAME) if(NOT CUDA_ARCH_NAME)
......
...@@ -218,6 +218,10 @@ struct Argument { ...@@ -218,6 +218,10 @@ struct Argument {
DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t); DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
int);
private: private:
std::unordered_set<std::string> valid_fields_; std::unordered_set<std::string> valid_fields_;
}; };
......
...@@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -150,6 +150,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu())); pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size", pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size())); new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
} }
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
......
...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool enable_int8 = Get<bool>("enable_int8"); bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu"); bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size"); int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
lite_api::TargetType target_type; lite_api::TargetType target_type;
if (use_gpu) { if (use_gpu) {
...@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -263,11 +264,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the // Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects // input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible. // whether tensor sharing is feasible.
paddle::lite::Place({target_type, precision_type}), paddle::lite_api::Place({target_type, precision_type}),
paddle::lite::Place({target_type, PRECISION(kInt64)}), paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite::Place({target_type, PRECISION(kFloat)}), paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
}; };
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size; config.xpu_l3_workspace_size = xpu_l3_workspace_size;
if (dump_model) { if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./model.bin", config.model);
......
...@@ -461,6 +461,8 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -461,6 +461,8 @@ void AnalysisPredictor::PrepareArgument() {
} }
if (config_.lite_engine_enabled()) { if (config_.lite_engine_enabled()) {
argument_.SetCpuMathLibraryNumThreads(
config_.cpu_math_library_num_threads());
argument_.SetLitePrecisionMode(config_.lite_precision_mode_); argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_); argument_.SetLitePassesFilter(config_.lite_passes_filter_);
argument_.SetLiteOpsFilter(config_.lite_ops_filter_); argument_.SetLiteOpsFilter(config_.lite_ops_filter_);
......
...@@ -20,8 +20,12 @@ ...@@ -20,8 +20,12 @@
#define LITE_WITH_XPU 1 #define LITE_WITH_XPU 1
#endif #endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/engine.h"
#include "lite/api/paddle_use_passes.h" #include <utility>
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const { ...@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return engines_.at(name).get() != nullptr; return engines_.at(name).get() != nullptr;
} }
paddle::lite::Predictor* EngineManager::Get(const std::string& name) const { paddle::lite_api::PaddlePredictor* EngineManager::Get(
const std::string& name) const {
return engines_.at(name).get(); return engines_.at(name).get();
} }
paddle::lite::Predictor* EngineManager::Create(const std::string& name, paddle::lite_api::PaddlePredictor* EngineManager::Create(
const EngineConfig& cfg) { const std::string& name, const EngineConfig& cfg) {
if (cfg.valid_places.front().target == TARGET(kCUDA)) { // config info for predictor.
#ifdef PADDLE_WITH_CUDA paddle::lite_api::CxxConfig lite_cxx_config;
paddle::lite::Env<TARGET(kCUDA)>::Init(); lite_cxx_config.set_model_buffer(cfg.model.c_str(), cfg.model.size(),
cfg.param.c_str(), cfg.param.size());
lite_cxx_config.set_valid_places(cfg.valid_places);
#ifdef PADDLE_WITH_ARM
set_threads.set_threads(cfg.cpu_math_library_num_threads);
#else
lite_cxx_config.set_x86_math_library_num_threads(
cfg.cpu_math_library_num_threads);
#endif #endif
} else if (cfg.valid_places.front().target == TARGET(kXPU)) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
paddle::lite::TargetWrapper<TARGET(kXPU)>::workspace_l3_size_per_thread = lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size; cfg.xpu_l3_workspace_size);
#endif #endif
}
auto* p = new paddle::lite::Predictor(); // create predictor
p->Build("", cfg.model, cfg.param, cfg.valid_places, cfg.neglected_passes, std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
cfg.model_type, cfg.model_from_memory); paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
engines_[name].reset(p); engines_[name] = std::move(p);
return p; return engines_[name].get();
} }
void EngineManager::DeleteAll() { void EngineManager::DeleteAll() {
for (auto& item : engines_) { for (auto& item : engines_) {
item.second.reset(nullptr); item.second.reset();
} }
} }
......
...@@ -23,12 +23,9 @@ ...@@ -23,12 +23,9 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall" #pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h" #include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
#include "lite/core/context.h" #include "lite/api/paddle_use_passes.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
namespace paddle { namespace paddle {
...@@ -38,25 +35,33 @@ namespace lite { ...@@ -38,25 +35,33 @@ namespace lite {
struct EngineConfig { struct EngineConfig {
std::string model; std::string model;
std::string param; std::string param;
paddle::lite::Place prefer_place; std::vector<paddle::lite_api::Place> valid_places;
std::vector<paddle::lite::Place> valid_places;
std::vector<std::string> neglected_passes; std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true}; bool model_from_memory{true};
// for xpu
size_t xpu_l3_workspace_size; size_t xpu_l3_workspace_size;
// for x86 or arm
int cpu_math_library_num_threads{1};
// for cuda
bool use_multi_stream{false};
}; };
class EngineManager { class EngineManager {
public: public:
bool Empty() const; bool Empty() const;
bool Has(const std::string& name) const; bool Has(const std::string& name) const;
paddle::lite::Predictor* Get(const std::string& name) const; paddle::lite_api::PaddlePredictor* Get(const std::string& name) const;
paddle::lite::Predictor* Create(const std::string& name, paddle::lite_api::PaddlePredictor* Create(const std::string& name,
const EngineConfig& cfg); const EngineConfig& cfg);
void DeleteAll(); void DeleteAll();
private: private:
std::unordered_map<std::string, std::unique_ptr<paddle::lite::Predictor>> std::unordered_map<std::string,
std::shared_ptr<paddle::lite_api::PaddlePredictor>>
engines_; engines_;
}; };
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h" #include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data, ...@@ -144,16 +145,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
} }
} }
void InitDstTensor(paddle::lite::Tensor* dst, const framework::LoDTensor& src) { void* GetLiteTensorDataPtr(paddle::lite_api::Tensor* src,
PrecisionType precision_type,
TargetType target_type) {
void* res{nullptr};
switch (precision_type) {
case PrecisionType::kFloat:
res = static_cast<void*>(src->mutable_data<float>(target_type));
break;
case PrecisionType::kInt8:
res = static_cast<void*>(src->mutable_data<int8_t>(target_type));
break;
case PrecisionType::kInt32:
res = static_cast<void*>(src->mutable_data<int32_t>(target_type));
break;
case PrecisionType::kInt64:
res = static_cast<void*>(src->mutable_data<int64_t>(target_type));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."));
break;
}
return res;
}
int64_t GetLiteTensorNumel(const paddle::lite_api::Tensor& tensor) {
auto shape = tensor.shape();
int64_t numel = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
return numel;
}
void InitDstTensor(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src) {
// Currently, Lite needs to explicitly specify the target type of // Currently, Lite needs to explicitly specify the target type of
// the input tensor. // the input tensor.
constexpr int empty_size = 0; constexpr int empty_size = 0;
dst->mutable_data(GetLiteTargetType(src.place()), empty_size); dst->Resize({empty_size});
dst->set_precision(GetLitePrecisionType(src.type())); GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
SetLoD(dst->mutable_lod(), src.lod()); GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
} }
void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
...@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) { ...@@ -162,7 +202,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
} }
template <> template <>
void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const framework::LoDTensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = src.place(); const platform::Place& src_place = src.place();
...@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src, ...@@ -171,52 +212,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type()); static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
dst->Resize(framework::vectorize(src.dims())); dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data<void>(); const void* src_data = src.data<void>();
void* dst_data = dst->mutable_data(bytes); void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type(); << ", dst = " << dst << ", src_type = " << src.type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << dst->memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorCopyAsync(framework::LoDTensor* dst, const paddle::lite::Tensor& src, void TensorCopyAsync(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
dst->Resize(paddle::framework::make_ddim(src.dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src.shape()));
InitDstTensor(dst, src); InitDstTensor(dst, src);
const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place(); const platform::Place& dst_place = dst->place();
const size_t bytes = int64_t src_numel = GetLiteTensorNumel(src);
static_cast<size_t>(src.numel()) * framework::SizeOfType(dst->type()); const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const void* src_data = src.raw_data(); const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here. // When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type()); void* dst_data = dst->mutable_data(dst_place, dst->type());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type(); << ", dst = " << dst << ", src_type = " << dst->type();
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << src.memory_size(); VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
template <> template <>
void TensorDataShare(paddle::lite::Tensor* dst, framework::LoDTensor* src) { void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
const size_t bytes =
static_cast<size_t>(src->numel()) * framework::SizeOfType(src->type());
auto buf = std::make_shared<paddle::lite::Buffer>(paddle::lite::Buffer(
src->data<void>(), GetLiteTargetType(src->place()), src->memory_size()));
dst->Resize(framework::vectorize(src->dims())); dst->Resize(framework::vectorize(src->dims()));
dst->set_precision(GetLitePrecisionType(src->type())); dst->ShareExternalMemory(src->data<void>(), src->memory_size(),
SetLoD(dst->mutable_lod(), src->lod()); GetLiteTargetType(src->place()));
dst->ResetBuffer(buf, bytes); dst->SetPrecision(GetLitePrecisionType(src->type()));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
} }
template <> template <>
void TensorDataShare(framework::LoDTensor* dst, paddle::lite::Tensor* src) { void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
constexpr framework::proto::VarType::Type dtype = constexpr framework::proto::VarType::Type dtype =
framework::proto::VarType_Type_FP32; framework::proto::VarType_Type_FP32;
void* src_raw_data = src->raw_data(); void* src_raw_data =
GetLiteTensorDataPtr(src, GetLitePrecisionType(dtype), src->target());
size_t memory_size = GetLiteTensorNumel(*src) * sizeof(float);
std::shared_ptr<memory::allocation::Allocation> holder( std::shared_ptr<memory::allocation::Allocation> holder(
new memory::allocation::Allocation(src_raw_data, src->memory_size(), new memory::allocation::Allocation(src_raw_data, memory_size,
GetNativePlace(src->target()))); GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->dims().Vectorize())); dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, dtype); dst->ResetHolderWithType(holder, dtype);
} }
......
...@@ -102,10 +102,10 @@ TEST(EngineManager, engine) { ...@@ -102,10 +102,10 @@ TEST(EngineManager, engine) {
config.model_from_memory = true; config.model_from_memory = true;
config.valid_places = { config.valid_places = {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif #endif
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
}; };
LOG(INFO) << "Create EngineManager"; LOG(INFO) << "Create EngineManager";
...@@ -118,7 +118,7 @@ TEST(EngineManager, engine) { ...@@ -118,7 +118,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has( ASSERT_EQ(inference::Singleton<inference::lite::EngineManager>::Global().Has(
unique_key), unique_key),
true); true);
paddle::lite::Predictor* engine_0 = paddle::lite_api::PaddlePredictor* engine_0 =
inference::Singleton<inference::lite::EngineManager>::Global().Get( inference::Singleton<inference::lite::EngineManager>::Global().Get(
unique_key); unique_key);
CHECK_NOTNULL(engine_0); CHECK_NOTNULL(engine_0);
......
...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) { ...@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC)); EXPECT_ANY_THROW(GetNativeLayoutType(DataLayoutType::kNHWC));
} }
template <typename T>
void test_lite_tensor_data_ptr(PrecisionType precision_type) {
void* GetLiteTensorDataPtr(paddle::lite_api::Tensor * src,
PrecisionType precision_type,
TargetType target_type);
const int count = 4;
paddle::lite::Tensor lite_tensor;
lite_tensor.Resize({count});
auto* lite_tensor_data = lite_tensor.mutable_data<T>();
for (size_t i = 0; i < count; ++i) {
lite_tensor_data[i] = i;
}
paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
T* data = static_cast<T*>(GetLiteTensorDataPtr(
&lite_api_tensor, precision_type, TargetType::kHost));
for (size_t i = 0; i < count; ++i) {
CHECK_EQ(data[i], static_cast<T>(i)) << "the i-th num is not correct.";
}
}
TEST(LiteEngineOp, GetLiteTensorDataPtr) {
test_lite_tensor_data_ptr<int64_t>(PrecisionType::kInt64);
test_lite_tensor_data_ptr<int32_t>(PrecisionType::kInt32);
test_lite_tensor_data_ptr<int8_t>(PrecisionType::kInt8);
EXPECT_ANY_THROW(test_lite_tensor_data_ptr<double>(PrecisionType::kUnk));
}
void test_tensor_copy(const platform::DeviceContext& ctx) { void test_tensor_copy(const platform::DeviceContext& ctx) {
// Create LoDTensor. // Create LoDTensor.
std::vector<float> vector({1, 2, 3, 4}); std::vector<float> vector({1, 2, 3, 4});
...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) { ...@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and copy. // Create lite::Tensor and copy.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorCopyAsync(&lite_tensor, lod_tensor, ctx); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorCopyAsync(&lite_api_tensor, lod_tensor, ctx);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
platform::GpuStreamSync( platform::GpuStreamSync(
...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) { ...@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor.set_lod(lod); lod_tensor.set_lod(lod);
// Create lite::Tensor and share. // Create lite::Tensor and share.
paddle::lite::Tensor lite_tensor; paddle::lite::Tensor lite_tensor;
TensorDataShare(&lite_tensor, &lod_tensor); paddle::lite_api::Tensor lite_api_tensor(&lite_tensor);
TensorDataShare(&lite_api_tensor, &lod_tensor);
// Copy to LoDTensor. // Copy to LoDTensor.
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
std::vector<float> result; std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result); TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector); ASSERT_EQ(result, vector);
......
...@@ -27,7 +27,7 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -27,7 +27,7 @@ TEST(AnalysisPredictor, use_gpu) {
AnalysisConfig config; AnalysisConfig config;
config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
config.SetModel(model_dir + "/model", model_dir + "/params"); config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32); config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
std::vector<PaddleTensor> inputs; std::vector<PaddleTensor> inputs;
auto predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
......
...@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
private: private:
std::vector<std::string> in_names_; std::vector<std::string> in_names_;
std::vector<std::string> out_names_; std::vector<std::string> out_names_;
paddle::lite::Predictor *engine_; paddle::lite_api::PaddlePredictor *engine_;
framework::proto::VarType::Type precision_; framework::proto::VarType::Type precision_;
bool use_gpu_; bool use_gpu_;
bool zero_copy_; bool zero_copy_;
...@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
framework::LoDTensor src_t = framework::LoDTensor src_t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, inference::analysis::GetFromScope<framework::LoDTensor>(scope,
in_names_[i]); in_names_[i]);
paddle::lite::Tensor *dst_t = engine_->GetInput(i); paddle::lite_api::Tensor dst_t = *(engine_->GetInput(i));
VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> " VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> "
<< engine_->GetInputNames()[i] << ")"; << engine_->GetInputNames()[i] << ")";
inference::lite::utils::TensorCopy(dst_t, &src_t, *ctx, zero_copy_); inference::lite::utils::TensorCopy(&dst_t, &src_t, *ctx, zero_copy_);
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(dev_place)) { if (platform::is_gpu_place(dev_place)) {
...@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase { ...@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
engine_->Run(); engine_->Run();
VLOG(3) << "lite engine run done"; VLOG(3) << "lite engine run done";
for (size_t i = 0; i < out_names_.size(); i++) { for (size_t i = 0; i < out_names_.size(); i++) {
paddle::lite::Tensor src_t = *(engine_->GetOutput(i)); paddle::lite_api::Tensor src_t = *(engine_->GetOutput(i));
framework::LoDTensor *dst_t = framework::LoDTensor *dst_t =
&inference::analysis::GetFromScope<framework::LoDTensor>( &inference::analysis::GetFromScope<framework::LoDTensor>(
scope, out_names_[i]); scope, out_names_[i]);
......
...@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) { ...@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
inference::lite::EngineConfig config; inference::lite::EngineConfig config;
config.valid_places = { config.valid_places = {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::lite::Place({TARGET(kCUDA), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kCUDA), PRECISION(kFloat)}),
#endif #endif
paddle::lite::Place({TARGET(kHost), PRECISION(kAny)}), paddle::lite_api::Place({TARGET(kX86), PRECISION(kFloat)}),
paddle::lite::Place({TARGET(kX86), PRECISION(kFloat)}), paddle::lite_api::Place({TARGET(kHost), PRECISION(kAny)}),
}; };
serialize_params(&(config.param), &scope, repetitive_params); serialize_params(&(config.param), &scope, repetitive_params);
config.model = program.Proto()->SerializeAsString(); config.model = program.Proto()->SerializeAsString();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册