未验证 提交 7a3bb061 编写于 作者: Z Zhaolong Xing 提交者: GitHub

fix: (#17279)

1. infernce multi card occupy
2. facebox model inference occupy too much
test=develop
上级 50ad9046
...@@ -3,11 +3,13 @@ cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ...@@ -3,11 +3,13 @@ cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument
cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zero_copy_tensor) cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zero_copy_tensor)
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager) cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(analysis_passes SRCS passes.cc DEPS cc_library(analysis_passes SRCS passes.cc DEPS
ir_graph_build_pass ir_graph_build_pass
ir_analysis_pass ir_analysis_pass
ir_params_sync_among_devices_pass ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass memory_optim_pass
ir_graph_to_program_pass ir_graph_to_program_pass
) )
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
void AdjustCudnnWorkSpacePass::RunImpl(Argument* argument) {
if (!argument->use_gpu()) return;
auto& graph = argument->main_graph();
auto nodes = graph.Nodes();
const int cudnn_workspace_size_MB = 64;
const std::string attr_name = "workspace_size_MB";
for (auto& node : nodes) {
if (!node->IsOp()) continue;
auto* op_desc = node->Op();
if (!op_desc->HasAttr(attr_name)) continue;
op_desc->SetAttr(attr_name, cudnn_workspace_size_MB);
op_desc->Flush();
}
}
std::string AdjustCudnnWorkSpacePass::repr() const {
return "adjust-cudnn-work-space-pass";
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* The default cudnn workspace is 4G, we set it to 64M in this pass, which
* is applicable for most inference tasks.
*/
class AdjustCudnnWorkSpacePass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/passes/passes.h" #include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
...@@ -35,6 +36,8 @@ PassRegistry::PassRegistry() { ...@@ -35,6 +36,8 @@ PassRegistry::PassRegistry() {
passes_.emplace( passes_.emplace(
"ir_params_sync_among_devices_pass", "ir_params_sync_among_devices_pass",
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass)); std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
passes_.emplace("adjust_cudnn_workspace_size_pass",
std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
passes_.emplace( passes_.emplace(
"ir_graph_to_program_pass", "ir_graph_to_program_pass",
std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass)); std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));
......
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace paddle { namespace paddle {
......
...@@ -120,7 +120,11 @@ bool AnalysisPredictor::PrepareScope( ...@@ -120,7 +120,11 @@ bool AnalysisPredictor::PrepareScope(
scope_ = parent_scope; scope_ = parent_scope;
status_is_cloned_ = true; status_is_cloned_ = true;
} else { } else {
paddle::framework::InitDevices(false); if (config_.use_gpu_) {
paddle::framework::InitDevices(false, {config_.device_id_});
} else {
paddle::framework::InitDevices(false, {});
}
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope());
status_is_cloned_ = false; status_is_cloned_ = false;
} }
...@@ -459,6 +463,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -459,6 +463,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(fraction_of_gpu_memory); std::to_string(fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
flags.push_back("--selected_gpus=" +
std::to_string(config.gpu_device_id()));
VLOG(3) << "set flag: " << flag; VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
......
...@@ -73,7 +73,8 @@ class PaddlePassBuilder { ...@@ -73,7 +73,8 @@ class PaddlePassBuilder {
protected: protected:
std::vector<std::string> analysis_passes_{ std::vector<std::string> analysis_passes_{
{"ir_graph_build_pass", "ir_analysis_pass", {"ir_graph_build_pass", "ir_analysis_pass",
"ir_params_sync_among_devices_pass"}}; "ir_params_sync_among_devices_pass",
"adjust_cudnn_workspace_size_pass"}};
std::vector<std::string> passes_; std::vector<std::string> passes_;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册