未验证 提交 cb33835c 编写于 作者: W Wilber 提交者: GitHub

cherry-pick (#36653)

cherry-pick prs

#36568
fix fc fuse compat problem

#36610
support lite xpu choose device id

#36010
update lite branch

#36628
add file exists check
上级 7612bf1c
...@@ -50,7 +50,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -50,7 +50,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite) set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
if(NOT LITE_GIT_TAG) if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG 4ab64daecc11fbf74fffdc6a4733f388472e7d5d) set(LITE_GIT_TAG 62fc737d4a553bca738f96b0402b28f26a8d2d4f)
endif() endif()
if(NOT CUDA_ARCH_NAME) if(NOT CUDA_ARCH_NAME)
...@@ -197,9 +197,9 @@ set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_ ...@@ -197,9 +197,9 @@ set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_
if (LITE_WITH_NNADAPTER) if (LITE_WITH_NNADAPTER)
set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so) set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so)
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter) set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so)
endif() endif()
else() else()
set(LITE_DEPS lite_full_static) set(LITE_DEPS lite_full_static)
......
...@@ -51,7 +51,12 @@ FCFusePass::FCFusePass() { ...@@ -51,7 +51,12 @@ FCFusePass::FCFusePass() {
.IsTensor() .IsTensor()
.End() .End()
.AddAttr("axis") .AddAttr("axis")
.IsNumGE(1) .IsNumMatch<int>([](int axis) -> bool {
if (axis == -1 || axis >= 1) {
return true;
}
return false;
})
.End(); .End();
AddOpCompat(OpCompat("relu")) AddOpCompat(OpCompat("relu"))
......
...@@ -238,6 +238,7 @@ struct Argument { ...@@ -238,6 +238,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string); DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string);
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
DECL_ARGUMENT_FIELD(xpu_device_id, XpuDeviceId, int);
DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool); DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir, DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir,
......
...@@ -202,6 +202,7 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -202,6 +202,7 @@ void IRPassManager::CreatePasses(Argument *argument,
new std::string(argument->xpu_autotune_file())); new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision())); pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
pass->Set("xpu_device_id", new int(argument->xpu_device_id()));
// NNAdapter Related // NNAdapter Related
pass->Set("use_nnadapter", new bool(argument->use_nnadapter())); pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
pass->Set("nnadapter_model_cache_dir", pass->Set("nnadapter_model_cache_dir",
......
...@@ -243,6 +243,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -243,6 +243,7 @@ void LiteSubgraphPass::SetUpEngine(
bool use_gpu = Get<bool>("use_gpu"); bool use_gpu = Get<bool>("use_gpu");
bool enable_int8 = Get<bool>("enable_int8"); bool enable_int8 = Get<bool>("enable_int8");
bool use_xpu = Get<bool>("use_xpu"); bool use_xpu = Get<bool>("use_xpu");
int xpu_device_id = Get<int>("xpu_device_id");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size"); int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads"); int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
bool locked = Get<bool>("locked"); bool locked = Get<bool>("locked");
...@@ -305,6 +306,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -305,6 +306,7 @@ void LiteSubgraphPass::SetUpEngine(
}; };
config.cpu_math_library_num_threads = cpu_math_library_num_threads; config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size; config.xpu_l3_workspace_size = xpu_l3_workspace_size;
config.device_id = xpu_device_id;
config.locked = locked; config.locked = locked;
config.autotune = autotune; config.autotune = autotune;
config.autotune_file = autotune_file; config.autotune_file = autotune_file;
......
...@@ -619,6 +619,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -619,6 +619,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_); argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
argument_.SetXpuDeviceId(config_.xpu_device_id_);
// NNAdapter related // NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter); argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames( argument_.SetNNAdapterDeviceNames(
......
...@@ -67,6 +67,7 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( ...@@ -67,6 +67,7 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file); lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file);
lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision, lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision,
cfg.adaptive_seqlen); cfg.adaptive_seqlen);
lite_cxx_config.set_xpu_dev_per_thread(cfg.device_id);
#endif #endif
#ifdef LITE_SUBGRAPH_WITH_NPU #ifdef LITE_SUBGRAPH_WITH_NPU
......
...@@ -39,6 +39,9 @@ struct EngineConfig { ...@@ -39,6 +39,9 @@ struct EngineConfig {
std::vector<std::string> neglected_passes; std::vector<std::string> neglected_passes;
lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf};
bool model_from_memory{true}; bool model_from_memory{true};
// TODO(wilber): now only works for xpu, lite gpu can support device_id or
// not?
int device_id = 0;
// for xpu // for xpu
size_t xpu_l3_workspace_size; size_t xpu_l3_workspace_size;
......
...@@ -197,6 +197,9 @@ void SerializeShapeRangeInfo( ...@@ -197,6 +197,9 @@ void SerializeShapeRangeInfo(
void DeserializeShapeRangeInfo( void DeserializeShapeRangeInfo(
const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) { const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) {
int fd = open(path.c_str(), O_RDONLY); int fd = open(path.c_str(), O_RDONLY);
if (fd == -1) {
PADDLE_THROW(platform::errors::NotFound("File [%s] is not found.", path));
}
google::protobuf::io::FileInputStream *is = google::protobuf::io::FileInputStream *is =
new google::protobuf::io::FileInputStream(fd); new google::protobuf::io::FileInputStream(fd);
google::protobuf::TextFormat::Parse(is, info); google::protobuf::TextFormat::Parse(is, info);
......
...@@ -118,4 +118,7 @@ TEST(shape_info_io, read_and_write) { ...@@ -118,4 +118,7 @@ TEST(shape_info_io, read_and_write) {
std::vector<std::string> names{"test1"}; std::vector<std::string> names{"test1"};
paddle::inference::UpdateShapeRangeInfo(path, min_shape, max_shape, opt_shape, paddle::inference::UpdateShapeRangeInfo(path, min_shape, max_shape, opt_shape,
names); names);
ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo(
"no_exists_file", &min_shape, &max_shape, &opt_shape);
, paddle::platform::EnforceNotMet);
} }
...@@ -223,7 +223,7 @@ function cmake_base() { ...@@ -223,7 +223,7 @@ function cmake_base() {
-DWITH_GLOO=${gloo_flag} -DWITH_GLOO=${gloo_flag}
-DWITH_LITE=${WITH_LITE:-OFF} -DWITH_LITE=${WITH_LITE:-OFF}
-DWITH_XPU=${WITH_XPU:-OFF} -DWITH_XPU=${WITH_XPU:-OFF}
-DLITE_GIT_TAG=_release/v2.10 -DLITE_GIT_TAG=release/v2.10
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF}
-DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF}
-DWITH_ARM=${WITH_ARM:-OFF} -DWITH_ARM=${WITH_ARM:-OFF}
...@@ -266,7 +266,7 @@ EOF ...@@ -266,7 +266,7 @@ EOF
-DWITH_PSCORE=${distibuted_flag} \ -DWITH_PSCORE=${distibuted_flag} \
-DWITH_PSLIB=${WITH_PSLIB:-OFF} \ -DWITH_PSLIB=${WITH_PSLIB:-OFF} \
-DWITH_GLOO=${gloo_flag} \ -DWITH_GLOO=${gloo_flag} \
-DLITE_GIT_TAG=_release/v2.10 \ -DLITE_GIT_TAG=release/v2.10 \
-DWITH_XPU=${WITH_XPU:-OFF} \ -DWITH_XPU=${WITH_XPU:-OFF} \
-DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \ -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \
-DWITH_LITE=${WITH_LITE:-OFF} \ -DWITH_LITE=${WITH_LITE:-OFF} \
......
...@@ -41,13 +41,13 @@ function add_failed(){ ...@@ -41,13 +41,13 @@ function add_failed(){
api_params_diff=`python ${PADDLE_ROOT}/tools/check_api_compatible.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec` api_params_diff=`python ${PADDLE_ROOT}/tools/check_api_compatible.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec`
api_spec_diff=`python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec.api ${PADDLE_ROOT}/paddle/fluid/API_PR.spec.api` api_spec_diff=`python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec.api ${PADDLE_ROOT}/paddle/fluid/API_PR.spec.api`
if [ "$api_spec_diff" != "" -o "${api_params_diff}" != "" ]; then if [ "$api_spec_diff" != "" -o "${api_params_diff}" != "" ]; then
echo_line="You must have one RD (XiaoguangHu01 or lanxianghit) approval for API change.\n" echo_line="You must have one RD (XiaoguangHu01, lanxianghit or Superjomn) approval for API change.\n"
echo_line="${echo_line} and one TPM approval for API change: \n" echo_line="${echo_line} and one TPM approval for API change: \n"
echo_line="${echo_line} jzhang533/ZhangJun, dingjiaweiww/DingJiaWei, Heeenrrry/LiKunLun, TCChenlong/ChenLong for general APIs\n" echo_line="${echo_line} jzhang533/ZhangJun, dingjiaweiww/DingJiaWei, Heeenrrry/LiKunLun, TCChenlong/ChenLong for general APIs\n"
echo_line="${echo_line} PangHua/XiangHui for distributed related APIs\n" echo_line="${echo_line} PangHua/XiangHui for distributed related APIs\n"
echo_line="${echo_line} twismon/WangYunKai, CheQiXiao/CheQiXiao for inference related APIs.\n" echo_line="${echo_line} twismon/WangYunKai, CheQiXiao/CheQiXiao for inference related APIs.\n"
check_approval 1 46782768 47554610 check_approval 1 46782768 47554610 328693
check_approval 1 29231 23093488 28379894 11935832 2682285 12050047 50894398 check_approval 1 29231 23093488 28379894 11935832 2682285 12050047 50894398
fi fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册