未验证 提交 1b0c5ef2 编写于 作者: P Pei Yang 提交者: GitHub

fix emb_eltwise_ln gpu_id bug (#33701)

上级 773aabc7
......@@ -421,7 +421,6 @@ void AnalysisConfig::Update() {
pass_builder()->AppendPass(pass);
}
}
LOG(INFO) << "use_dlnne_:" << use_dlnne_ << std::endl;
if (use_dlnne_) {
pass_builder()->ClearPasses();
for (const auto &pass : kDlnneSubgraphPasses) {
......
......@@ -152,8 +152,8 @@ bool AnalysisPredictor::Init(
: platform::ProfilerState::kCPU;
platform::EnableProfiler(tracking_device);
} else {
LOG(INFO) << "Profiler is deactivated, and no profiling report will be "
"generated.";
VLOG(2) << "Profiler is deactivated, and no profiling report will be "
"generated.";
}
// no matter with or without MKLDNN
......
......@@ -315,7 +315,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// workspace.
/// \param max_batch_size The maximum batch size of this prediction task,
/// better set as small as possible for less performance loss.
/// \param min_subgrpah_size The minimum TensorRT subgraph size needed, if a
/// \param min_subgraph_size The minimum TensorRT subgraph size needed, if a
/// subgraph is smaller than this, it will not be transferred to TensorRT
/// engine.
/// \param precision The precision used in TensorRT.
......
......@@ -134,7 +134,7 @@ int EmbEltwiseLayernormPluginDynamicImpl<T>::enqueue(
int batch = id_dims.d[0];
int seq_len = id_dims.d[1];
int input_num = embs_.size();
cudaGetDevice(&device_id_);
auto in_ptr_gpu_d =
in_ptr_tensor_.mutable_data<int64_t>(platform::CUDAPlace(device_id_));
auto emb_ptr_gpu_d =
......
......@@ -29,11 +29,6 @@ void run(const AnalysisConfig& config, std::vector<float>* out_data) {
int run_batch = 1;
const int run_seq_len = 128;
std::vector<int64_t> tmp_input;
std::vector<float> tmp_four_input;
tmp_input.reserve(run_batch * run_seq_len);
tmp_four_input.reserve(run_batch * run_seq_len);
int64_t i0[run_seq_len] = {
1, 3558, 4, 75, 491, 89, 340, 313, 93, 4, 255, 10, 75, 321,
4095, 1902, 4, 134, 49, 75, 311, 14, 44, 178, 543, 15, 12043, 2,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册