Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
f7d671a6
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f7d671a6
编写于
8月 26, 2019
作者:
Y
yaopenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add auc monitor
上级
46be5864
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
271 addition
and
8 deletion
+271
-8
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.cc
...d/train/custom_trainer/feed/common/runtime_environment.cc
+67
-0
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h
...id/train/custom_trainer/feed/common/runtime_environment.h
+4
-0
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
+1
-0
paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc
...le/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc
+162
-0
paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.h
paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.h
+31
-6
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
+6
-2
未找到文件。
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.cc
浏览文件 @
f7d671a6
...
...
@@ -5,6 +5,58 @@ namespace paddle {
namespace
custom_trainer
{
namespace
feed
{
template
<
class
T
>
struct
mpi_type_trait
{
};
template
<
>
struct
mpi_type_trait
<
double
>
{
static
MPI_Datatype
type
()
{
return
MPI_DOUBLE
;
}
};
template
<
>
struct
mpi_type_trait
<
float
>
{
static
MPI_Datatype
type
()
{
return
MPI_FLOAT
;
}
};
template
<
>
struct
mpi_type_trait
<
int32_t
>
{
static
MPI_Datatype
type
()
{
return
MPI_INT
;
}
};
template
<
>
struct
mpi_type_trait
<
uint32_t
>
{
static
MPI_Datatype
type
()
{
return
MPI_UNSIGNED
;
}
};
template
<
>
struct
mpi_type_trait
<
int64_t
>
{
static
MPI_Datatype
type
()
{
return
MPI_LONG_LONG
;
}
};
template
<
>
struct
mpi_type_trait
<
uint64_t
>
{
static
MPI_Datatype
type
()
{
return
MPI_UNSIGNED_LONG_LONG
;
}
};
template
<
>
struct
mpi_type_trait
<
long
long
>
{
static
MPI_Datatype
type
()
{
return
MPI_LONG_LONG
;
}
};
template
<
>
struct
mpi_type_trait
<
unsigned
long
long
>
{
static
MPI_Datatype
type
()
{
return
MPI_UNSIGNED_LONG_LONG
;
}
};
RuntimeEnvironment
::
RuntimeEnvironment
()
{}
RuntimeEnvironment
::~
RuntimeEnvironment
()
{}
bool
RuntimeEnvironment
::
is_master_node
(
EnvironmentRole
role
)
{
...
...
@@ -79,6 +131,15 @@ public:
MPI_Bcast
(
ar
.
Buffer
(),
len
,
MPI_BYTE
,
root_id
,
node_info
.
mpi_comm
);
}
virtual
double
all_reduce_ele
(
double
x
)
{
double
tot
;
MPI_Allreduce
(
&
x
,
&
tot
,
1
,
mpi_type_trait
<
double
>::
type
(),
MPI_SUM
,
MPI_COMM_WORLD
);
return
tot
;
}
virtual
void
all_reduce_arr
(
double
*
x
,
int
n
)
{
MPI_Allreduce
(
MPI_IN_PLACE
,
x
,
n
,
mpi_type_trait
<
double
>::
type
(),
MPI_SUM
,
MPI_COMM_WORLD
);
}
protected:
virtual
void
print_log
(
EnvironmentRole
role
,
EnvironmentLogType
type
,
EnvironmentLogLevel
level
,
const
std
::
string
&
log_str
)
{
...
...
@@ -123,6 +184,12 @@ public:
virtual
void
bcast
(
paddle
::
framework
::
BinaryArchive
&
ar
,
int
root_id
,
EnvironmentRole
role
)
{
return
;
}
virtual
double
all_reduce_ele
(
double
x
)
{
return
x
;
}
virtual
void
all_reduce_arr
(
double
*
x
,
int
n
)
{
return
;
}
protected:
virtual
void
print_log
(
EnvironmentRole
role
,
EnvironmentLogType
type
,
EnvironmentLogLevel
level
,
const
std
::
string
&
log_str
)
{
...
...
paddle/fluid/train/custom_trainer/feed/common/runtime_environment.h
浏览文件 @
f7d671a6
...
...
@@ -67,6 +67,10 @@ public:
virtual
void
barrier
(
EnvironmentRole
role
)
=
0
;
//bcast 广播
virtual
void
bcast
(
paddle
::
framework
::
BinaryArchive
&
ar
,
int
root_id
,
EnvironmentRole
role
)
=
0
;
//all_reduce sum element 规约元素
virtual
double
all_reduce_ele
(
double
x
)
=
0
;
//all_reduce sum array 规约数组
virtual
void
all_reduce_arr
(
double
*
x
,
int
n
)
=
0
;
//接口只允许在主线程调用 End
protected:
virtual
void
print_log
(
EnvironmentRole
role
,
EnvironmentLogType
type
,
...
...
paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h
浏览文件 @
f7d671a6
...
...
@@ -51,6 +51,7 @@ private:
struct
SampleInstance
{
std
::
string
id
;
std
::
vector
<
float
>
predicts
;
std
::
vector
<
float
>
labels
;
std
::
vector
<
FeatureItem
>
features
;
std
::
vector
<
float
>
embedx
;
...
...
paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc
0 → 100644
浏览文件 @
f7d671a6
#include "paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.h"
namespace
paddle
{
namespace
custom_trainer
{
namespace
feed
{
int
AucMonitor
::
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
Monitor
::
initialize
(
config
,
context_ptr
);
_target_name
=
config
[
"target"
].
as
<
std
::
string
>
();
_label_name
=
config
[
"label"
].
as
<
std
::
string
>
();
_table_size
=
1000000
;
if
(
config
[
"table_size"
])
{
_table_size
=
config
[
"table_size"
].
as
<
int
>
();
}
set_table_size
(
_table_size
);
_compute_interval
=
3600
;
if
(
config
[
"compute_interval"
])
{
uint32_t
interval
=
config
[
"compute_interval"
].
as
<
uint32_t
>
();
if
(
interval
!=
3600
||
interval
!=
86400
)
{
LOG
(
FATAL
)
<<
" AucMonitor config compute_interval just support hour: 3600 or day: 86400. "
;
return
-
1
;
}
_compute_interval
=
interval
;
}
}
void
AucMonitor
::
add_data
(
int
epoch_id
,
const
Executor
*
executor
,
SampleInstance
*
instance
,
size_t
num
)
{
if
(
executor
==
nullptr
||
instance
==
nullptr
||
instance
->
predicts
.
empty
()
||
instance
->
labels
.
empty
()
||
num
<=
0
||
instance
->
predicts
.
size
()
<
num
||
instance
->
labels
.
size
()
<
num
)
{
LOG
(
FATAL
)
<<
"AucMonitor add predict data is invalid, predicts or labels is empty, num["
<<
num
<<
"]"
;
return
;
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
_mutex
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
add_unlocked
(
instance
->
predicts
[
i
],
std
::
lround
(
instance
->
labels
[
i
]));
}
}
bool
AucMonitor
::
need_compute_result
(
int
epoch_id
,
EpochAccessor
*
accessor
)
{
CHECK
(
accessor
!=
nullptr
);
uint64_t
epoch_time
=
accessor
->
epoch_timestamp
(
epoch_id
);
if
(
epoch_time
%
_compute_interval
!=
0
)
{
return
false
;
}
return
true
;
}
void
AucMonitor
::
compute_result
()
{
double
*
table
[
2
]
=
{
&
_table
[
0
][
0
],
&
_table
[
1
][
0
]};
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
Monitor
::
_context_ptr
->
environment
->
all_reduce_arr
(
table
[
i
],
_table_size
);
}
double
area
=
0
;
double
fp
=
0
;
double
tp
=
0
;
for
(
int
i
=
_table_size
-
1
;
i
>=
0
;
i
--
)
{
double
newfp
=
fp
+
table
[
0
][
i
];
double
newtp
=
tp
+
table
[
1
][
i
];
area
+=
(
newfp
-
fp
)
*
(
tp
+
newtp
)
/
2
;
fp
=
newfp
;
tp
=
newtp
;
}
_auc
=
area
/
(
fp
*
tp
);
_mae
=
Monitor
::
_context_ptr
->
environment
->
all_reduce_ele
(
_local_abserr
)
/
(
fp
+
tp
);
_rmse
=
sqrt
(
Monitor
::
_context_ptr
->
environment
->
all_reduce_ele
(
_local_sqrerr
)
/
(
fp
+
tp
));
_rmse
=
sqrt
(
_rmse
/
(
fp
+
tp
));
_actual_ctr
=
tp
/
(
fp
+
tp
);
_predicted_ctr
=
Monitor
::
_context_ptr
->
environment
->
all_reduce_ele
(
_local_pred
)
/
(
fp
+
tp
);
_size
=
fp
+
tp
;
calculate_bucket_error
();
}
std
::
string
AucMonitor
::
format_result
()
{
double
copc
=
0.0
;
if
(
fabs
(
_predicted_ctr
)
>
1e-6
)
{
copc
=
_actual_ctr
/
_predicted_ctr
;
}
char
buf
[
10240
];
snprintf
(
buf
,
10240
*
sizeof
(
char
),
"%s: AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f "
"Actual CTR=%.6f Predicted CTR=%.6f COPC=%.6f INS Count=%.0f"
,
Monitor
::
_name
.
c_str
(),
_auc
,
_bucket_error
,
_mae
,
_rmse
,
_actual_ctr
,
_predicted_ctr
,
copc
,
_size
);
return
std
::
string
(
buf
);
}
void
AucMonitor
::
add_unlocked
(
double
pred
,
int
label
)
{
CHECK
(
pred
>=
0
&&
pred
<=
1
)
<<
"pred["
<<
pred
<<
"] outside of [0,1]"
;
CHECK
(
label
==
0
||
label
==
1
)
<<
"label["
<<
label
<<
"] invalid"
;
_table
[
label
][
std
::
min
(
int
(
pred
*
_table_size
),
_table_size
-
1
)]
++
;
_local_abserr
+=
fabs
(
pred
-
label
);
_local_sqrerr
+=
(
pred
-
label
)
*
(
pred
-
label
);
_local_pred
+=
pred
;
}
void
AucMonitor
::
calculate_bucket_error
()
{
double
last_ctr
=
-
1
;
double
impression_sum
=
0
;
double
ctr_sum
=
0.0
;
double
click_sum
=
0.0
;
double
error_sum
=
0.0
;
double
error_count
=
0
;
double
*
table
[
2
]
=
{
&
_table
[
0
][
0
],
&
_table
[
1
][
0
]};
for
(
int
i
=
0
;
i
<
_table_size
;
i
++
)
{
double
click
=
table
[
1
][
i
];
double
show
=
table
[
0
][
i
]
+
table
[
1
][
i
];
double
ctr
=
(
double
)
i
/
_table_size
;
if
(
fabs
(
ctr
-
last_ctr
)
>
kMaxSpan
)
{
last_ctr
=
ctr
;
impression_sum
=
0.0
;
ctr_sum
=
0.0
;
click_sum
=
0.0
;
}
impression_sum
+=
show
;
ctr_sum
+=
ctr
*
show
;
click_sum
+=
click
;
double
adjust_ctr
=
ctr_sum
/
impression_sum
;
double
relative_error
=
sqrt
((
1
-
adjust_ctr
)
/
(
adjust_ctr
*
impression_sum
));
if
(
relative_error
<
kRelativeErrorBound
)
{
double
actual_ctr
=
click_sum
/
impression_sum
;
double
relative_ctr_error
=
fabs
(
actual_ctr
/
adjust_ctr
-
1
);
error_sum
+=
relative_ctr_error
*
impression_sum
;
error_count
+=
impression_sum
;
last_ctr
=
-
1
;
}
}
_bucket_error
=
error_count
>
0
?
error_sum
/
error_count
:
0.0
;
}
void
AucMonitor
::
set_table_size
(
int
table_size
)
{
CHECK
(
table_size
>=
1
);
_table_size
=
table_size
;
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
_table
[
i
]
=
std
::
vector
<
double
>
();
}
reset
();
}
void
AucMonitor
::
reset
()
{
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
_table
[
i
].
assign
(
_table_size
,
0.0
);
}
_local_abserr
=
0
;
_local_sqrerr
=
0
;
_local_pred
=
0
;
}
}
// namespace feed
}
// namespace custom_trainer
}
// namespace paddle
paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.h
浏览文件 @
f7d671a6
#pragma once
#include <string>
#include <cmath> //std::lround
#include "paddle/fluid/train/custom_trainer/feed/monitor/monitor.h"
namespace
paddle
{
...
...
@@ -14,14 +15,13 @@ public:
virtual
~
AucMonitor
()
{}
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
Monitor
::
initialize
(
config
,
context_ptr
);
//一些额外配置 对于AUC主要是target && label 信息
return
0
;
}
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
override
;
//添加一项记录,统计内容Monitor自行从Executor按需获取
virtual
void
add_data
(
int
epoch_id
,
const
Executor
*
executor
);
virtual
void
add_data
(
int
epoch_id
,
const
Executor
*
executor
,
SampleInstance
*
instance
,
size_t
num
);
//是否开始结果统计
virtual
bool
need_compute_result
(
int
epoch_id
,
EpochAccessor
*
accessor
);
...
...
@@ -31,6 +31,31 @@ public:
virtual
std
::
string
format_result
();
virtual
void
reset
();
protected:
std
::
string
_label_name
;
std
::
string
_target_name
;
std
::
string
_name
;
std
::
string
_output_var
;
std
::
mutex
_mutex
;
double
_local_abserr
,
_local_sqrerr
,
_local_pred
;
double
_auc
;
double
_mae
;
double
_rmse
;
double
_actual_ctr
,
_predicted_ctr
;
double
_size
;
double
_bucket_error
;
int
_table_size
;
void
add_unlocked
(
double
pred
,
int
label
);
private:
void
calculate_bucket_error
();
void
set_table_size
(
int
table_size
);
uint32_t
_compute_interval
;
std
::
vector
<
double
>
_table
[
2
];
static
constexpr
double
kRelativeErrorBound
=
0.05
;
static
constexpr
double
kMaxSpan
=
0.01
;
};
}
// namespace feed
...
...
paddle/fluid/train/custom_trainer/feed/monitor/monitor.h
浏览文件 @
f7d671a6
#pragma once
#include <string>
#include "paddle/fluid/train/custom_trainer/feed/accessor/epoch_accessor.h"
#include "paddle/fluid/train/custom_trainer/feed/common/registerer.h"
#include "paddle/fluid/train/custom_trainer/feed/trainer_context.h"
#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h"
#include "paddle/fluid/train/custom_trainer/feed/dataset/data_reader.h"
namespace
paddle
{
namespace
custom_trainer
{
...
...
@@ -15,12 +17,13 @@ public:
virtual
int
initialize
(
const
YAML
::
Node
&
config
,
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
_name
=
conf
[
"name"
].
as
<
std
::
string
>
();
_name
=
config
[
"name"
].
as
<
std
::
string
>
();
_context_ptr
=
context_ptr
;
return
0
;
}
//添加一项记录,统计内容Monitor自行从Executor按需获取
virtual
void
add_data
(
int
epoch_id
,
const
Executor
*
executor
)
=
0
;
virtual
void
add_data
(
int
epoch_id
,
const
Executor
*
executor
,
SampleInstance
*
instance
,
size_t
num
)
=
0
;
//是否对于当前epoch_id进行结果统计
virtual
bool
need_compute_result
(
int
epoch_id
,
EpochAccessor
*
accessor
)
=
0
;
...
...
@@ -37,6 +40,7 @@ public:
protected:
std
::
string
_name
;
std
::
shared_ptr
<
TrainerContext
>
_context_ptr
;
};
REGISTER_REGISTERER
(
Monitor
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录