Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
5f3aaafc
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5f3aaafc
编写于
9月 23, 2020
作者:
S
sandyhouse
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update, test=develop
上级
a2241734
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
75 addition
and
352 deletion
+75
-352
paddle/fluid/framework/device_worker.h
paddle/fluid/framework/device_worker.h
+0
-8
paddle/fluid/framework/pipeline_trainer.cc
paddle/fluid/framework/pipeline_trainer.cc
+5
-210
paddle/fluid/framework/section_worker.cc
paddle/fluid/framework/section_worker.cc
+67
-119
paddle/fluid/framework/trainer.h
paddle/fluid/framework/trainer.h
+0
-13
paddle/fluid/operators/collective/c_recv_op.cu.cc
paddle/fluid/operators/collective/c_recv_op.cu.cc
+3
-2
未找到文件。
paddle/fluid/framework/device_worker.h
浏览文件 @
5f3aaafc
...
@@ -414,7 +414,6 @@ class HeterCpuWorker : public HogwildWorker {
...
@@ -414,7 +414,6 @@ class HeterCpuWorker : public HogwildWorker {
#if defined(PADDLE_WITH_NCCL)
#if defined(PADDLE_WITH_NCCL)
class
SectionWorker
:
public
DeviceWorker
{
class
SectionWorker
:
public
DeviceWorker
{
public:
public:
// SectionWorker() { local_batch_id_ = 0; }
SectionWorker
()
{}
SectionWorker
()
{}
~
SectionWorker
()
override
{}
~
SectionWorker
()
override
{}
...
@@ -430,7 +429,6 @@ class SectionWorker : public DeviceWorker {
...
@@ -430,7 +429,6 @@ class SectionWorker : public DeviceWorker {
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
// void SetSectionIndex(int section_id) { section_id_ = section_id; }
void
SetDeviceIndex
(
int
tid
)
override
{}
void
SetDeviceIndex
(
int
tid
)
override
{}
void
SetThreadIndex
(
int
thread_id
)
{
thread_id_
=
thread_id
;
}
void
SetThreadIndex
(
int
thread_id
)
{
thread_id_
=
thread_id
;
}
void
SetMicrobatchNum
(
int
num
)
{
num_microbatches_
=
num
;
}
void
SetMicrobatchNum
(
int
num
)
{
num_microbatches_
=
num
;
}
...
@@ -442,7 +440,6 @@ class SectionWorker : public DeviceWorker {
...
@@ -442,7 +440,6 @@ class SectionWorker : public DeviceWorker {
skip_vars_
=
skip_vars
;
skip_vars_
=
skip_vars
;
}
}
void
SetStartCpuCoreId
(
int
id
)
{
cpu_id_
=
id
;
}
void
SetStartCpuCoreId
(
int
id
)
{
cpu_id_
=
id
;
}
// static void ResetBatchId() { batch_id_ = 0; }
protected:
protected:
void
AutoSetCPUAffinity
(
bool
reuse
);
void
AutoSetCPUAffinity
(
bool
reuse
);
...
@@ -455,13 +452,8 @@ class SectionWorker : public DeviceWorker {
...
@@ -455,13 +452,8 @@ class SectionWorker : public DeviceWorker {
const
Scope
*
minibatch_scope_
;
const
Scope
*
minibatch_scope_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
// static std::mutex thread_mutex;
// static std::mutex cout_mutex;
// static std::condition_variable thread_condition;
// static bool threads_completed;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
program_
;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
program_
;
static
uint64_t
batch_id_
;
static
uint64_t
batch_id_
;
// uint64_t local_batch_id_;
platform
::
DeviceContext
*
dev_ctx_
=
nullptr
;
platform
::
DeviceContext
*
dev_ctx_
=
nullptr
;
};
};
...
...
paddle/fluid/framework/pipeline_trainer.cc
浏览文件 @
5f3aaafc
...
@@ -28,71 +28,9 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
...
@@ -28,71 +28,9 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
num_microbatches_
=
section_params
.
num_microbatches
();
num_microbatches_
=
section_params
.
num_microbatches
();
VLOG
(
3
)
<<
"Number of microbatches per minibatch: "
<<
num_microbatches_
;
VLOG
(
3
)
<<
"Number of microbatches per minibatch: "
<<
num_microbatches_
;
trainer_desc_
=
trainer_desc
;
trainer_desc_
=
trainer_desc
;
start_cpu_core_id_
=
section_params
.
start_cpu_core_id
();
auto
cpu_core_id
=
section_params
.
start_cpu_core_id
();
// SetDataset(dataset);
ParseDumpConfig
(
trainer_desc
);
ParseDumpConfig
(
trainer_desc
);
// get filelist from trainer_desc here
// const std::vector<paddle::framework::DataFeed*> readers =
// dataset->GetReaders();
// VLOG(3) << "Number of program sections: " << section_num_;
// VLOG(3) << "readers num: " << readers.size();
// int num_readers = readers.size();
// PADDLE_ENFORCE_EQ(num_readers, 1,
// platform::errors::InvalidArgument(
// "Number of dataset readers for pipeline "
// "must be 1 now, but the value you give is %d.",
// num_readers));
// auto* reader = readers[0];
// workers_.resize(section_num_);
// for (int i = 0; i < section_num_; ++i) {
// const auto& section_config = section_params.section_config(i);
// platform::Place place;
// int place_id = section_config.place_id();
// switch (section_config.place()) {
// case SectionConfig::CPUPlace:
// place = platform::CPUPlace();
// break;
// case SectionConfig::CUDAPlace:
// // Note that one section has at most one GPU place in one pipeline
// PADDLE_ENFORCE_GE(
// place_id, 0,
// platform::errors::InvalidArgument(
// "The place_id value for CUDAPlace shoud be greater "
// "than or equal to 0, but the value you give is %d.",
// place_id));
// place = platform::CUDAPlace(place_id);
// break;
// case SectionConfig::CUDAPinnedPlace:
// place = platform::CUDAPinnedPlace();
// break;
// default:
// PADDLE_ENFORCE_NOT_NULL(nullptr,
// platform::errors::InvalidArgument(
// "Unkown place type in SectionConfig: %d",
// section_config.place()));
// }
// places_.emplace_back(place);
// VLOG(3) << "Device worker place: " << place << ", device id: " << place_id
// << ", section: " << i;
// workers_[i] = DeviceWorkerFactory::CreateDeviceWorker(
// trainer_desc.device_worker_name());
// auto this_worker =
// std::dynamic_pointer_cast<paddle::framework::SectionWorker>(
// workers_[i]);
// if (i == 0) {
// // we only set reader for the first section
// this_worker->SetDataFeed(reader);
// this_worker->SetReaderPlace(place);
// }
// this_worker->SetThreadIndex(i);
// this_worker->SetSectionIndex(i);
// this_worker->SetPlace(place);
// this_worker->Initialize(trainer_desc);
// this_worker->SetMicrobatchNum(num_microbatches_);
//}
const
auto
&
section_config
=
section_params
.
section_config
();
const
auto
&
section_config
=
section_params
.
section_config
();
int
place_id
=
section_config
.
place_id
();
int
place_id
=
section_config
.
place_id
();
PADDLE_ENFORCE_GE
(
place_id
,
0
,
PADDLE_ENFORCE_GE
(
place_id
,
0
,
...
@@ -108,7 +46,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
...
@@ -108,7 +46,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
this_worker
->
SetPlace
(
place_
);
this_worker
->
SetPlace
(
place_
);
this_worker
->
Initialize
(
trainer_desc
);
this_worker
->
Initialize
(
trainer_desc
);
this_worker
->
SetMicrobatchNum
(
num_microbatches_
);
this_worker
->
SetMicrobatchNum
(
num_microbatches_
);
this_worker
->
SetStartCpuCoreId
(
start_cpu_core_id_
);
this_worker
->
SetStartCpuCoreId
(
cpu_core_id
);
// set debug here
// set debug here
SetDebug
(
trainer_desc
.
debug
());
SetDebug
(
trainer_desc
.
debug
());
...
@@ -118,7 +56,6 @@ void PipelineTrainer::InitOtherEnv(const ProgramDesc& main_program) {
...
@@ -118,7 +56,6 @@ void PipelineTrainer::InitOtherEnv(const ProgramDesc& main_program) {
if
(
need_dump_field_
)
{
if
(
need_dump_field_
)
{
InitDumpEnv
();
InitDumpEnv
();
}
}
VLOG
(
3
)
<<
"init other env done."
;
}
}
std
::
string
PipelineTrainer
::
GetDumpPath
(
int
tid
)
{
std
::
string
PipelineTrainer
::
GetDumpPath
(
int
tid
)
{
...
@@ -135,51 +72,6 @@ void PipelineTrainer::InitDumpEnv() {
...
@@ -135,51 +72,6 @@ void PipelineTrainer::InitDumpEnv() {
}
}
}
}
// void PipelineTrainer::CopyParameters(int section_id, int microbatch_id,
// const ProgramDesc& program,
// const platform::Place& place) {
// auto& global_block = program.Block(0);
// std::map<std::string, int> param_map;
// for (auto& var : global_block.AllVars()) {
// if (var->Persistable()) {
// param_map[var->Name()] = 1;
// }
// }
// for (auto& var : global_block.AllVars()) {
// bool is_param_grad = false;
// size_t pos = 0;
// if ((pos = var->Name().find(kGradVarSuffix)) != std::string::npos) {
// auto prefix_name = var->Name().substr(0, pos);
// if (param_map.find(prefix_name) != param_map.end()) {
// is_param_grad = true;
// }
// }
// VLOG(3) << "Var name: " << var->Name();
// if ((var->Persistable() || is_param_grad) && microbatch_id == 0) {
// auto* ptr = root_scope_->FindVar(var->Name());
// auto* new_ptr = minibatch_scopes_[section_id]->Var(var->Name());
// VLOG(3) << "Create persistable var " << var->Name() << " for minibatch
// "
// << section_id << ", which pointer is " << new_ptr;
// InitializeVariable(new_ptr, var->GetType());
// if (is_param_grad) {
// continue;
// }
// const LoDTensor& root_tensor = ptr->Get<LoDTensor>();
// LoDTensor* minibatch_tensor = new_ptr->GetMutable<LoDTensor>();
// TensorCopy(*static_cast<const Tensor*>(&root_tensor), place,
// static_cast<Tensor*>(minibatch_tensor));
// } else if (!var->Persistable() && !is_param_grad) {
// auto* ptr =
// microbatch_scopes_[section_id][microbatch_id]->Var(var->Name());
// VLOG(3) << "Create variable " << var->Name() << " for section "
// << section_id << " microbatch " << microbatch_id
// << ", which pointer is " << ptr;
// InitializeVariable(ptr, var->GetType());
// }
// }
// }
void
PipelineTrainer
::
CopyParameters
(
int
microbatch_id
,
void
PipelineTrainer
::
CopyParameters
(
int
microbatch_id
,
const
ProgramDesc
&
program
,
const
ProgramDesc
&
program
,
const
platform
::
Place
&
place
)
{
const
platform
::
Place
&
place
)
{
...
@@ -190,6 +82,7 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
...
@@ -190,6 +82,7 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
param_map
[
var
->
Name
()]
=
1
;
param_map
[
var
->
Name
()]
=
1
;
}
}
}
}
for
(
auto
&
var
:
global_block
.
AllVars
())
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
bool
is_param_grad
=
false
;
bool
is_param_grad
=
false
;
size_t
pos
=
0
;
size_t
pos
=
0
;
...
@@ -199,7 +92,6 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
...
@@ -199,7 +92,6 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
is_param_grad
=
true
;
is_param_grad
=
true
;
}
}
}
}
VLOG
(
3
)
<<
"Var name: "
<<
var
->
Name
();
if
(
is_param_grad
&&
microbatch_id
==
0
)
{
if
(
is_param_grad
&&
microbatch_id
==
0
)
{
auto
*
ptr
=
minibatch_scope_
->
Var
(
var
->
Name
());
auto
*
ptr
=
minibatch_scope_
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
...
@@ -207,149 +99,52 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
...
@@ -207,149 +99,52 @@ void PipelineTrainer::CopyParameters(int microbatch_id,
<<
", which pointer is "
<<
ptr
;
<<
", which pointer is "
<<
ptr
;
}
else
if
(
!
var
->
Persistable
()
&&
!
is_param_grad
)
{
}
else
if
(
!
var
->
Persistable
()
&&
!
is_param_grad
)
{
auto
*
ptr
=
microbatch_scopes_
[
microbatch_id
]
->
Var
(
var
->
Name
());
auto
*
ptr
=
microbatch_scopes_
[
microbatch_id
]
->
Var
(
var
->
Name
());
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
" microbatch "
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
"
for
microbatch "
<<
microbatch_id
<<
", which pointer is "
<<
ptr
;
<<
microbatch_id
<<
", which pointer is "
<<
ptr
;
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
}
}
}
}
}
}
// void PipelineTrainer::GetSkipVars(int section_id, const ProgramDesc& program)
// {
// auto& global_block = program.Block(0);
// for (auto& op : global_block.AllOps()) {
// if (op->Type() != "enqueue") {
// continue;
// }
// auto input_arg_names = op->InputArgumentNames();
// PADDLE_ENFORCE_EQ(input_arg_names.size(), 1,
// platform::errors::InvalidArgument(
// "Number of input arguments for enqueue op must be
// 1, "
// "but the value is %d.",
// input_arg_names.size()));
// std::string input_arg_name = input_arg_names[0];
// if (input_arg_name.rfind("@GRAD") != input_arg_name.size() - 5) {
// skip_vars_[section_id].emplace_back(input_arg_name);
// VLOG(3) << "add skip var name: " << input_arg_name;
// }
// }
// }
// void PipelineTrainer::GetSkipVars(const ProgramDesc& program) {
// auto& global_block = program.Block(0);
// for (auto& op : global_block.AllOps()) {
// if (op->Type() != "c_send") {
// continue;
// }
// auto input_arg_names = op->InputArgumentNames();
// PADDLE_ENFORCE_EQ(input_arg_names.size(), 1,
// platform::errors::InvalidArgument(
// "Number of input arguments for c_send op must be 1,
// "
// "but the value given is %d.",
// input_arg_names.size()));
// std::string input_arg_name = input_arg_names[0];
// if (input_arg_name.rfind("@GRAD") != input_arg_name.size() - 5) {
// skip_vars_.emplace_back(input_arg_name);
// VLOG(3) << "add skip var name: " << input_arg_name;
// }
// }
// }
void
PipelineTrainer
::
InitTrainerEnv
(
const
ProgramDesc
&
main_program
,
void
PipelineTrainer
::
InitTrainerEnv
(
const
ProgramDesc
&
main_program
,
const
platform
::
Place
&
place
)
{
const
platform
::
Place
&
place
)
{
PADDLE_ENFORCE_NOT_NULL
(
root_scope_
,
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_NOT_NULL
(
root_scope_
,
platform
::
errors
::
InvalidArgument
(
"root_scope_ can not be nullptr"
));
"root_scope_ can not be nullptr"
));
// auto start_cpu_id = trainer_desc_.section_param().start_cpu_core_id();
// SectionWorker::cpu_id_.store(start_cpu_id);
// minibatch_scopes_.resize(section_num_);
// microbatch_scopes_.resize(section_num_);
// minibatch_scopes_.resize(1);
microbatch_scopes_
.
resize
(
num_microbatches_
);
microbatch_scopes_
.
resize
(
num_microbatches_
);
// skip_vars_.resize(section_num_);
VLOG
(
3
)
<<
"Create minibatch and microbatch scopes..."
;
VLOG
(
3
)
<<
"Create minibatch and microbatch scopes..."
;
// for (int i = 0; i < section_num_; ++i) {
minibatch_scope_
=
&
root_scope_
->
NewScope
();
minibatch_scope_
=
&
root_scope_
->
NewScope
();
std
::
shared_ptr
<
framework
::
ProgramDesc
>
program
;
std
::
shared_ptr
<
framework
::
ProgramDesc
>
program
;
program
.
reset
(
new
ProgramDesc
(
program
.
reset
(
new
ProgramDesc
(
trainer_desc_
.
section_param
().
section_config
().
program_desc
()));
trainer_desc_
.
section_param
().
section_config
().
program_desc
()));
// trainer_desc_.section_param().section_config(i).program_desc()));
// microbatch_scopes_[i].resize(num_microbatches_);
for
(
int
j
=
0
;
j
<
num_microbatches_
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_microbatches_
;
++
j
)
{
// microbatch_scopes_[j] = &minibatch_scopes_[i]->NewScope();
microbatch_scopes_
[
j
]
=
&
minibatch_scope_
->
NewScope
();
microbatch_scopes_
[
j
]
=
&
minibatch_scope_
->
NewScope
();
// CopyParameters(i, j, *program, places_[i]);
CopyParameters
(
j
,
*
program
,
place_
);
CopyParameters
(
j
,
*
program
,
place_
);
}
}
// GetSkipVars(i, *program);
// GetSkipVars(*program);
// }
// for (int i = 0; i < section_num_; ++i) {
auto
this_worker
=
auto
this_worker
=
std
::
dynamic_pointer_cast
<
paddle
::
framework
::
SectionWorker
>
(
worker_
);
std
::
dynamic_pointer_cast
<
paddle
::
framework
::
SectionWorker
>
(
worker_
);
// workers_[i]);
this_worker
->
SetRootScope
(
root_scope_
);
this_worker
->
SetRootScope
(
root_scope_
);
this_worker
->
SetMinibatchScope
(
minibatch_scope_
);
this_worker
->
SetMinibatchScope
(
minibatch_scope_
);
// this_worker->SetMicrobatchScopes(microbatch_scopes_[i]);
this_worker
->
SetMicrobatchScopes
(
microbatch_scopes_
);
this_worker
->
SetMicrobatchScopes
(
microbatch_scopes_
);
// this_worker->SetSkipVars(skip_vars_[i]);
//}
}
}
void
PipelineTrainer
::
Run
()
{
void
PipelineTrainer
::
Run
()
{
VLOG
(
3
)
<<
"Going to run"
;
VLOG
(
5
)
<<
"Going to run PipelineTrainer::Run()"
;
// for (int i = 0; i < section_num_; ++i) {
if
(
!
debug_
)
{
if
(
!
debug_
)
{
section_thread_
=
std
::
thread
(
&
DeviceWorker
::
TrainFiles
,
worker_
.
get
());
section_thread_
=
std
::
thread
(
&
DeviceWorker
::
TrainFiles
,
worker_
.
get
());
// section_threads_.push_back(
// std::thread(&DeviceWorker::TrainFiles, workers_.get()));
// std::thread(&DeviceWorker::TrainFiles, workers_[i].get()));
}
else
{
}
else
{
section_thread_
=
section_thread_
=
std
::
thread
(
&
DeviceWorker
::
TrainFilesWithProfiler
,
worker_
.
get
());
std
::
thread
(
&
DeviceWorker
::
TrainFilesWithProfiler
,
worker_
.
get
());
// section_threads_.push_back(std::thread(
// &DeviceWorker::TrainFilesWithProfiler, workers_.get()));
// &DeviceWorker::TrainFilesWithProfiler, workers_[i].get()));
}
}
//}
}
}
void
PipelineTrainer
::
Finalize
()
{
void
PipelineTrainer
::
Finalize
()
{
// for (auto& th : section_threads_) {
// th.join();
//}
section_thread_
.
join
();
section_thread_
.
join
();
if
(
need_dump_field_
)
{
if
(
need_dump_field_
)
{
FinalizeDumpEnv
();
FinalizeDumpEnv
();
}
}
// VLOG(3) << "copying back parameters. ";
// for (int i = 0; i < section_num_; ++i) {
// std::shared_ptr<framework::ProgramDesc> program;
// program.reset(new ProgramDesc(
// trainer_desc_.section_param().section_config(i).program_desc()));
// for (int j = 0; j < num_microbatches_; ++j) {
// auto& global_block = program->Block(0);
// for (auto& var : global_block.AllVars()) {
// if (var->Persistable()) {
// auto* ptr = root_scope_->FindVar(var->Name());
// LoDTensor* root_tensor = ptr->GetMutable<LoDTensor>();
// auto* minibatch_ptr = minibatch_scopes_[i]->Var(var->Name());
// const LoDTensor& minibatch_tensor =
// minibatch_ptr->Get<LoDTensor>();
// TensorCopy(*static_cast<const Tensor*>(&minibatch_tensor),
// places_[0],
// static_cast<Tensor*>(root_tensor));
// VLOG(3) << "Copy persitable var " << var->Name() << " to root
// scope";
// }
// }
// }
// }
root_scope_
->
DropKids
();
root_scope_
->
DropKids
();
// SectionWorker::ResetBatchId();
}
}
Scope
*
PipelineTrainer
::
GetWorkerScope
(
int
thread_id
)
{
Scope
*
PipelineTrainer
::
GetWorkerScope
(
int
thread_id
)
{
...
...
paddle/fluid/framework/section_worker.cc
浏览文件 @
5f3aaafc
...
@@ -30,28 +30,19 @@ limitations under the License. */
...
@@ -30,28 +30,19 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
// std::atomic<int> SectionWorker::cpu_id_(0);
// std::mutex SectionWorker::thread_mutex;
// std::mutex SectionWorker::cout_mutex;
// std::condition_variable SectionWorker::thread_condition;
// bool SectionWorker::threads_completed = false;
uint64_t
SectionWorker
::
batch_id_
(
0
);
uint64_t
SectionWorker
::
batch_id_
(
0
);
void
SectionWorker
::
Initialize
(
const
TrainerDesc
&
desc
)
{
void
SectionWorker
::
Initialize
(
const
TrainerDesc
&
desc
)
{
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
program_
.
reset
(
program_
.
reset
(
new
ProgramDesc
(
desc
.
section_param
().
section_config
().
program_desc
()));
new
ProgramDesc
(
desc
.
section_param
().
section_config
().
program_desc
()));
// desc.section_param().section_config(section_id_).program_desc()));
for
(
auto
&
op_desc
:
program_
->
Block
(
0
).
AllOps
())
{
for
(
auto
&
op_desc
:
program_
->
Block
(
0
).
AllOps
())
{
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
}
}
void
SectionWorker
::
AutoSetCPUAffinity
(
bool
reuse
)
{
void
SectionWorker
::
AutoSetCPUAffinity
(
bool
reuse
)
{
// int thread_cpu_id = cpu_id_.fetch_add(1);
unsigned
concurrency_cap
=
std
::
thread
::
hardware_concurrency
();
unsigned
concurrency_cap
=
std
::
thread
::
hardware_concurrency
();
// unsigned proc = thread_cpu_id;
unsigned
proc
=
cpu_id_
;
unsigned
proc
=
cpu_id_
;
if
(
proc
>=
concurrency_cap
)
{
if
(
proc
>=
concurrency_cap
)
{
...
@@ -61,7 +52,6 @@ void SectionWorker::AutoSetCPUAffinity(bool reuse) {
...
@@ -61,7 +52,6 @@ void SectionWorker::AutoSetCPUAffinity(bool reuse) {
LOG
(
INFO
)
<<
"All "
<<
concurrency_cap
LOG
(
INFO
)
<<
"All "
<<
concurrency_cap
<<
" CPUs have been set affinities. Fail to set "
<<
cpu_id_
<<
" CPUs have been set affinities. Fail to set "
<<
cpu_id_
<<
"th thread."
;
<<
"th thread."
;
// << thread_cpu_id << "th thread";
return
;
return
;
}
}
}
}
...
@@ -80,13 +70,12 @@ void SectionWorker::AutoSetCPUAffinity(bool reuse) {
...
@@ -80,13 +70,12 @@ void SectionWorker::AutoSetCPUAffinity(bool reuse) {
(
0
==
CPU_ISSET
(
proc
,
&
mask
)))
{
(
0
==
CPU_ISSET
(
proc
,
&
mask
)))
{
LOG
(
WARNING
)
<<
"Fail to set thread affinity to CPU "
<<
proc
;
LOG
(
WARNING
)
<<
"Fail to set thread affinity to CPU "
<<
proc
;
}
}
// VLOG(3) << "Set " << thread_cpu_id << "th thread affinity to CPU " << proc;
VLOG
(
3
)
<<
"Set "
<<
cpu_id_
<<
"th thread affinity to CPU "
<<
proc
;
VLOG
(
3
)
<<
"Set "
<<
cpu_id_
<<
"th thread affinity to CPU "
<<
proc
;
}
}
void
SectionWorker
::
TrainFiles
()
{
void
SectionWorker
::
TrainFiles
()
{
VLOG
(
3
)
<<
"begin section_worker TrainFiles"
;
VLOG
(
5
)
<<
"begin section_worker TrainFiles"
;
//
AutoSetCPUAffinity(true);
AutoSetCPUAffinity
(
true
);
int64_t
max_memory_size
=
0
;
int64_t
max_memory_size
=
0
;
std
::
unique_ptr
<
GarbageCollector
>
gc
;
std
::
unique_ptr
<
GarbageCollector
>
gc
;
...
@@ -109,12 +98,6 @@ void SectionWorker::TrainFiles() {
...
@@ -109,12 +98,6 @@ void SectionWorker::TrainFiles() {
#endif
#endif
platform
::
Timer
batch_timer
;
platform
::
Timer
batch_timer
;
// if (thread_id_ == 0) {
// while (true) {
// Start a minibatch.
// real number of microbatches run
// int real_microbatch_num = 0;
batch_timer
.
Start
();
batch_timer
.
Start
();
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
try
{
try
{
...
@@ -130,7 +113,8 @@ void SectionWorker::TrainFiles() {
...
@@ -130,7 +113,8 @@ void SectionWorker::TrainFiles() {
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kForward
)
|
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kForward
)
|
static_cast
<
int
>
(
OpRole
::
kLoss
));
static_cast
<
int
>
(
OpRole
::
kLoss
));
if
((
i
==
0
&&
run_first_mbatch
)
||
(
i
!=
0
&&
run_others
))
{
if
((
i
==
0
&&
run_first_mbatch
)
||
(
i
!=
0
&&
run_others
))
{
VLOG
(
3
)
<<
"running an op "
<<
op
->
Type
()
<<
" for scope "
<<
i
;
VLOG
(
3
)
<<
"Forward: running op "
<<
op
->
Type
()
<<
" for micro-batch "
<<
i
;
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
...
@@ -139,19 +123,10 @@ void SectionWorker::TrainFiles() {
...
@@ -139,19 +123,10 @@ void SectionWorker::TrainFiles() {
}
}
}
}
}
catch
(
platform
::
EOFException
&
e
)
{
}
catch
(
platform
::
EOFException
&
e
)
{
// std::unique_lock<std::mutex> lk(thread_mutex);
VLOG
(
3
)
<<
"EOF encountered and completed."
;
// threads_completed = true;
throw
;
VLOG
(
3
)
<<
"thread completed."
;
// VLOG(3) << "called notify all";
// thread_condition.notify_all();
VLOG
(
3
)
<<
"EOF encountered"
;
// throw platform::EOFException();
// throw e;
PADDLE_THROW_EOF
();
break
;
}
}
}
}
dev_ctx_
->
Wait
();
// backward pass
// backward pass
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
...
@@ -160,7 +135,8 @@ void SectionWorker::TrainFiles() {
...
@@ -160,7 +135,8 @@ void SectionWorker::TrainFiles() {
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kBackward
)
||
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kBackward
)
||
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kBackward
)
|
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kBackward
)
|
static_cast
<
int
>
(
OpRole
::
kLoss
)))
{
static_cast
<
int
>
(
OpRole
::
kLoss
)))
{
VLOG
(
3
)
<<
"running an op "
<<
op
->
Type
()
<<
" for scope "
<<
i
;
VLOG
(
3
)
<<
"Backward: running op "
<<
op
->
Type
()
<<
" for micro-batch "
<<
i
;
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
...
@@ -169,30 +145,28 @@ void SectionWorker::TrainFiles() {
...
@@ -169,30 +145,28 @@ void SectionWorker::TrainFiles() {
}
}
}
}
}
}
dev_ctx_
->
Wait
();
// update pass
// update pass
for
(
auto
&
op
:
ops_
)
{
for
(
auto
&
op
:
ops_
)
{
int
op_role
=
op
->
Attr
<
int
>
(
std
::
string
(
"op_role"
));
int
op_role
=
op
->
Attr
<
int
>
(
std
::
string
(
"op_role"
));
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kOptimize
))
{
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kOptimize
))
{
VLOG
(
3
)
<<
"
running an op "
<<
op
->
Type
()
<<
" for minibatch scope"
;
VLOG
(
3
)
<<
"
Update: running op "
<<
op
->
Type
()
;
op
->
Run
(
*
microbatch_scopes_
[
0
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
0
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
DeleteUnusedTensors
(
*
microbatch_scopes_
[
0
],
op
.
get
(),
unused_vars_
,
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
gc
.
get
());
gc
.
get
());
}
}
}
}
}
}
}
dev_ctx_
->
Wait
();
dev_ctx_
->
Wait
();
batch_timer
.
Pause
();
batch_timer
.
Pause
();
VLOG
(
0
)
<<
"batch time: "
<<
batch_timer
.
ElapsedUS
();
VLOG
(
0
)
<<
"batch
: "
<<
batch_id_
<<
",
time: "
<<
batch_timer
.
ElapsedUS
();
++
batch_id_
;
++
batch_id_
;
}
}
void
SectionWorker
::
TrainFilesWithProfiler
()
{
void
SectionWorker
::
TrainFilesWithProfiler
()
{
VLOG
(
3
)
<<
"begin section_worker TrainFiles with profiler"
;
VLOG
(
5
)
<<
"begin section_worker TrainFiles with profiler"
;
//
AutoSetCPUAffinity(true);
AutoSetCPUAffinity
(
true
);
platform
::
Timer
batch_timer
;
platform
::
Timer
batch_timer
;
platform
::
Timer
timeline
;
platform
::
Timer
timeline
;
...
@@ -216,7 +190,6 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -216,7 +190,6 @@ void SectionWorker::TrainFilesWithProfiler() {
int64_t
max_memory_size
=
0
;
int64_t
max_memory_size
=
0
;
std
::
unique_ptr
<
GarbageCollector
>
gc
;
std
::
unique_ptr
<
GarbageCollector
>
gc
;
// const std::vector<std::string> keep_vars;
auto
unused_vars_
=
GetUnusedVars
(
program_
->
Block
(
0
),
ops_
,
skip_vars_
);
auto
unused_vars_
=
GetUnusedVars
(
program_
->
Block
(
0
),
ops_
,
skip_vars_
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
if
(
platform
::
is_gpu_place
(
place_
))
{
...
@@ -235,14 +208,13 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -235,14 +208,13 @@ void SectionWorker::TrainFilesWithProfiler() {
}
}
#endif
#endif
// if (thread_id_ == 0) {
struct
timeval
start
;
struct
timeval
start
;
struct
timeval
end
;
struct
timeval
end
;
struct
timeval
micro_start
;
struct
timeval
micro_start
;
struct
timeval
micro_end
;
struct
timeval
micro_end
;
// Start a minibatch.
// Start a minibatch.
batch_timer
.
Start
();
batch_timer
.
Start
();
// int real_microbatch_num = 0;
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
try
{
try
{
int
op_idx
=
0
;
int
op_idx
=
0
;
...
@@ -260,9 +232,8 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -260,9 +232,8 @@ void SectionWorker::TrainFilesWithProfiler() {
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kForward
)
|
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kForward
)
|
static_cast
<
int
>
(
OpRole
::
kLoss
));
static_cast
<
int
>
(
OpRole
::
kLoss
));
if
((
i
==
0
&&
run_first_mbatch
)
||
(
i
!=
0
&&
run_others
))
{
if
((
i
==
0
&&
run_first_mbatch
)
||
(
i
!=
0
&&
run_others
))
{
// VLOG(3) << "running an op " << op->Type() << " for " << thread_id_
VLOG
(
3
)
<<
"Forward: running op "
<<
op
->
Type
()
<<
" for micro-batch "
// << " for scope " << i;
<<
i
;
VLOG
(
3
)
<<
"running an op "
<<
op
->
Type
()
<<
" for scope "
<<
i
;
timeline
.
Start
();
timeline
.
Start
();
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
...
@@ -282,32 +253,26 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -282,32 +253,26 @@ void SectionWorker::TrainFilesWithProfiler() {
}
}
op_count
[
op_idx
]
+=
1
;
op_count
[
op_idx
]
+=
1
;
op_total_time
[
op_idx
]
+=
time
;
op_total_time
[
op_idx
]
+=
time
;
{
// std::unique_lock<std::mutex> lk(cout_mutex);
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"::FWD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread_id_
std
::
cout
<<
"::FWD:B["
<<
batch_id_
<<
"]:SCOPE["
<<
i
<<
"]:OP["
<<
"]:SCOPE["
<<
i
<<
"]:OP["
<<
op
->
Type
()
<<
"]:START["
<<
op
->
Type
()
<<
"]:START["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
"]:END["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
"]:END["
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
}
}
}
op_idx
++
;
op_idx
++
;
}
}
gettimeofday
(
&
micro_end
,
NULL
);
gettimeofday
(
&
micro_end
,
NULL
);
{
// std::unique_lock<std::mutex> lk(cout_mutex);
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"!!FWD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread_id_
std
::
cout
<<
"!!FWD:B["
<<
batch_id_
<<
"]:START["
<<
"]:START["
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
"]:END["
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
"]:END["
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
std
::
endl
;
}
}
catch
(
platform
::
EOFException
&
e
)
{
}
catch
(
platform
::
EOFException
&
e
)
{
VLOG
(
3
)
<<
"thread completed."
;
VLOG
(
0
)
<<
"EOF encountered, and completed"
;
VLOG
(
0
)
<<
"EOF encountered"
;
VLOG
(
0
)
<<
"============timeline============"
;
VLOG
(
0
)
<<
"============timeline============"
;
for
(
size_t
i
=
0
;
i
<
ops_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ops_
.
size
();
++
i
)
{
VLOG
(
0
)
<<
"op: "
<<
op_name
[
i
]
<<
", max_time: "
<<
op_max_time
[
i
]
VLOG
(
0
)
<<
"op: "
<<
op_name
[
i
]
<<
", max_time: "
<<
op_max_time
[
i
]
...
@@ -315,11 +280,10 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -315,11 +280,10 @@ void SectionWorker::TrainFilesWithProfiler() {
<<
", mean_time: "
<<
op_total_time
[
i
]
/
op_count
[
i
];
<<
", mean_time: "
<<
op_total_time
[
i
]
/
op_count
[
i
];
}
}
VLOG
(
0
)
<<
"================================"
;
VLOG
(
0
)
<<
"================================"
;
throw
e
;
throw
;
break
;
}
}
}
}
dev_ctx_
->
Wait
();
// backward pass
// backward pass
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
int
op_idx
=
0
;
int
op_idx
=
0
;
...
@@ -330,7 +294,8 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -330,7 +294,8 @@ void SectionWorker::TrainFilesWithProfiler() {
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kBackward
)
||
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kBackward
)
||
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kBackward
)
|
op_role
==
(
static_cast
<
int
>
(
OpRole
::
kBackward
)
|
static_cast
<
int
>
(
OpRole
::
kLoss
)))
{
static_cast
<
int
>
(
OpRole
::
kLoss
)))
{
VLOG
(
3
)
<<
"running an op "
<<
op
->
Type
()
<<
" for scope "
<<
i
;
VLOG
(
3
)
<<
"Backward: running an op "
<<
op
->
Type
()
<<
" for micro-batch "
<<
i
;
timeline
.
Start
();
timeline
.
Start
();
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
i
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
...
@@ -350,35 +315,25 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -350,35 +315,25 @@ void SectionWorker::TrainFilesWithProfiler() {
}
}
op_count
[
op_idx
]
+=
1
;
op_count
[
op_idx
]
+=
1
;
op_total_time
[
op_idx
]
+=
time
;
op_total_time
[
op_idx
]
+=
time
;
{
// std::unique_lock<std::mutex> lk(cout_mutex);
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"::BWD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread_id_
std
::
cout
<<
"::BWD:B["
<<
batch_id_
<<
"]:SCOPE["
<<
i
<<
"]:OP["
<<
"]:SCOPE["
<<
i
<<
"]:OP["
<<
op
->
Type
()
<<
"]:START["
<<
op
->
Type
()
<<
"]:START["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
"]:END["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
"]:END["
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
}
}
}
op_idx
++
;
op_idx
++
;
}
}
gettimeofday
(
&
micro_end
,
NULL
);
gettimeofday
(
&
micro_end
,
NULL
);
{
// std::unique_lock<std::mutex> lk(cout_mutex);
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"!!BWD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread_id_
std
::
cout
<<
"!!BWD:B["
<<
batch_id_
<<
"]:START["
<<
"]:START["
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
"]:END["
<<
"]:END["
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
std
::
endl
;
}
}
}
dev_ctx_
->
Wait
();
// if (real_microbatch_num == 0) {
// batch_timer.Pause();
// VLOG(0) << "batch time: " << batch_timer.ElapsedUS();
// return;
// }
// update pass
// update pass
int
op_idx
=
0
;
int
op_idx
=
0
;
gettimeofday
(
&
micro_start
,
NULL
);
gettimeofday
(
&
micro_start
,
NULL
);
...
@@ -386,16 +341,13 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -386,16 +341,13 @@ void SectionWorker::TrainFilesWithProfiler() {
gettimeofday
(
&
start
,
NULL
);
gettimeofday
(
&
start
,
NULL
);
int
op_role
=
op
->
Attr
<
int
>
(
std
::
string
(
"op_role"
));
int
op_role
=
op
->
Attr
<
int
>
(
std
::
string
(
"op_role"
));
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kOptimize
))
{
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kOptimize
))
{
VLOG
(
3
)
<<
"running an op "
<<
op
->
Type
()
<<
" for "
<<
thread_id_
VLOG
(
3
)
<<
"Update: running op "
<<
op
->
Type
();
<<
" for minibatch scope"
;
timeline
.
Start
();
timeline
.
Start
();
op
->
Run
(
*
microbatch_scopes_
[
0
],
place_
);
op
->
Run
(
*
microbatch_scopes_
[
0
],
place_
);
if
(
gc
)
{
if
(
gc
)
{
for
(
int
i
=
0
;
i
<
num_microbatches_
;
++
i
)
{
DeleteUnusedTensors
(
*
microbatch_scopes_
[
0
],
op
.
get
(),
unused_vars_
,
DeleteUnusedTensors
(
*
microbatch_scopes_
[
i
],
op
.
get
(),
unused_vars_
,
gc
.
get
());
gc
.
get
());
}
}
}
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
gettimeofday
(
&
end
,
NULL
);
gettimeofday
(
&
end
,
NULL
);
timeline
.
Pause
();
timeline
.
Pause
();
...
@@ -409,31 +361,27 @@ void SectionWorker::TrainFilesWithProfiler() {
...
@@ -409,31 +361,27 @@ void SectionWorker::TrainFilesWithProfiler() {
}
}
op_count
[
op_idx
]
+=
1
;
op_count
[
op_idx
]
+=
1
;
op_total_time
[
op_idx
]
+=
time
;
op_total_time
[
op_idx
]
+=
time
;
{
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"::UPD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread_id_
std
::
cout
<<
"::UPD:B["
<<
batch_id_
<<
"]:OP["
<<
op
->
Type
()
<<
"]:SCOPE["
<<
num_microbatches_
<<
"]:OP["
<<
op
->
Type
()
<<
"]:START["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
"]:END["
<<
"]:START["
<<
start
.
tv_sec
*
1e6
+
start
.
tv_usec
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
"]:END["
<<
end
.
tv_sec
*
1e6
+
end
.
tv_usec
<<
"]"
<<
std
::
endl
;
}
}
}
op_idx
++
;
op_idx
++
;
}
}
gettimeofday
(
&
micro_end
,
NULL
);
gettimeofday
(
&
micro_end
,
NULL
);
{
std
::
cout
<<
std
::
fixed
;
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
precision
(
0
);
std
::
cout
.
precision
(
0
);
std
::
cout
<<
"!!UPD:B["
<<
batch_id_
<<
"]:SEC["
<<
thread
_id_
<<
"]:START["
std
::
cout
<<
"!!UPD:B["
<<
batch
_id_
<<
"]:START["
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
"]:END["
<<
micro_start
.
tv_sec
*
1e6
+
micro_start
.
tv_usec
<<
"]:END["
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
std
::
endl
;
<<
micro_end
.
tv_sec
*
1e6
+
micro_end
.
tv_usec
<<
"]"
<<
std
::
endl
;
}
dev_ctx_
->
Wait
();
dev_ctx_
->
Wait
();
batch_timer
.
Pause
();
batch_timer
.
Pause
();
VLOG
(
0
)
<<
"batch time: "
<<
batch_timer
.
ElapsedUS
();
VLOG
(
0
)
<<
"batch
: "
<<
batch_id_
<<
",
time: "
<<
batch_timer
.
ElapsedUS
();
++
batch_id_
;
++
batch_id_
;
}
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
#endif
#endif
paddle/fluid/framework/trainer.h
浏览文件 @
5f3aaafc
...
@@ -221,32 +221,19 @@ class PipelineTrainer : public TrainerBase {
...
@@ -221,32 +221,19 @@ class PipelineTrainer : public TrainerBase {
void
GetSkipVars
(
const
ProgramDesc
&
main_program
);
void
GetSkipVars
(
const
ProgramDesc
&
main_program
);
protected:
protected:
// int section_num_;
int
num_microbatches_
;
int
num_microbatches_
;
int
start_cpu_core_id_
;
// std::vector<platform::Place> places_;
platform
::
Place
place_
;
platform
::
Place
place_
;
// std::vector<std::vector<std::string>> skip_vars_;
std
::
vector
<
std
::
string
>
skip_vars_
;
std
::
vector
<
std
::
string
>
skip_vars_
;
TrainerDesc
trainer_desc_
;
TrainerDesc
trainer_desc_
;
// std::vector<std::thread> section_threads_;
std
::
thread
section_thread_
;
std
::
thread
section_thread_
;
// worker: [section_id]
// std::vector<std::shared_ptr<paddle::framework::DeviceWorker>> workers_;
std
::
shared_ptr
<
paddle
::
framework
::
DeviceWorker
>
worker_
;
std
::
shared_ptr
<
paddle
::
framework
::
DeviceWorker
>
worker_
;
// minibatch_scopes_: [section_id]
// std::vector<Scope*> minibatch_scopes_;
Scope
*
minibatch_scope_
;
Scope
*
minibatch_scope_
;
// microbatch_scopes_: [section_id][microbatch_id]
// std::vector<std::vector<Scope*>> microbatch_scopes_;
// microbatch_scopes_: [microbatch_id]
// microbatch_scopes_: [microbatch_id]
std
::
vector
<
Scope
*>
microbatch_scopes_
;
std
::
vector
<
Scope
*>
microbatch_scopes_
;
void
CopyParameters
(
int
microbatch_id
,
const
ProgramDesc
&
program
,
void
CopyParameters
(
int
microbatch_id
,
const
ProgramDesc
&
program
,
const
platform
::
Place
&
place
);
const
platform
::
Place
&
place
);
// bool isPersistableVarGrad(std::string name);
// bool isPersistable(VarDesc* var);
};
};
#endif
#endif
...
...
paddle/fluid/operators/collective/c_recv_op.cu.cc
浏览文件 @
5f3aaafc
...
@@ -73,8 +73,9 @@ class CRecvOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -73,8 +73,9 @@ class CRecvOpCUDAKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
stream
=
comm
->
stream
();
stream
=
comm
->
stream
();
}
}
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
ncclRecv
(
PADDLE_ENFORCE_CUDA_SUCCESS
(
numel_ptr
,
1
,
ncclInt
,
peer
,
comm
->
comm
(),
stream
));
platform
::
dynload
::
ncclRecv
(
static_cast
<
void
*>
(
numel_ptr
),
1
,
ncclInt
,
peer
,
comm
->
comm
(),
stream
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMemcpy
(
&
numel
,
numel_ptr
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
cudaMemcpy
(
&
numel
,
numel_ptr
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
VLOG
(
0
)
<<
"numel:"
<<
numel
;
VLOG
(
0
)
<<
"numel:"
<<
numel
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录