Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5dcfb699
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5dcfb699
编写于
3月 23, 2022
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_some_yaml_config
上级
31363c3f
8e67629c
变更
75
展开全部
隐藏空白更改
内联
并排
Showing
75 changed file
with
3210 addition
and
607 deletion
+3210
-607
cmake/external/ascend.cmake
cmake/external/ascend.cmake
+3
-3
paddle/fluid/distributed/ps/service/brpc_ps_client.cc
paddle/fluid/distributed/ps/service/brpc_ps_client.cc
+15
-18
paddle/fluid/distributed/ps/service/communicator/communicator.cc
...fluid/distributed/ps/service/communicator/communicator.cc
+25
-49
paddle/fluid/distributed/ps/service/communicator/communicator.h
.../fluid/distributed/ps/service/communicator/communicator.h
+2
-2
paddle/fluid/distributed/ps/table/CMakeLists.txt
paddle/fluid/distributed/ps/table/CMakeLists.txt
+2
-1
paddle/fluid/distributed/ps/table/common_dense_table.cc
paddle/fluid/distributed/ps/table/common_dense_table.cc
+23
-13
paddle/fluid/distributed/ps/table/ctr_accessor.cc
paddle/fluid/distributed/ps/table/ctr_accessor.cc
+5
-1
paddle/fluid/distributed/ps/table/ctr_accessor.h
paddle/fluid/distributed/ps/table/ctr_accessor.h
+14
-3
paddle/fluid/distributed/ps/table/depends/dense.h
paddle/fluid/distributed/ps/table/depends/dense.h
+36
-14
paddle/fluid/distributed/ps/table/sparse_accessor.cc
paddle/fluid/distributed/ps/table/sparse_accessor.cc
+339
-0
paddle/fluid/distributed/ps/table/sparse_accessor.h
paddle/fluid/distributed/ps/table/sparse_accessor.h
+208
-0
paddle/fluid/distributed/ps/table/table.cc
paddle/fluid/distributed/ps/table/table.cc
+2
-0
paddle/fluid/distributed/ps/wrapper/fleet.cc
paddle/fluid/distributed/ps/wrapper/fleet.cc
+163
-135
paddle/fluid/distributed/ps/wrapper/fleet.h
paddle/fluid/distributed/ps/wrapper/fleet.h
+21
-7
paddle/fluid/distributed/test/memory_sparse_table_test.cc
paddle/fluid/distributed/test/memory_sparse_table_test.cc
+3
-3
paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
+3
-0
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-4
paddle/fluid/framework/device_worker.h
paddle/fluid/framework/device_worker.h
+82
-0
paddle/fluid/framework/device_worker_factory.cc
paddle/fluid/framework/device_worker_factory.cc
+1
-0
paddle/fluid/framework/dist_multi_trainer.cc
paddle/fluid/framework/dist_multi_trainer.cc
+15
-3
paddle/fluid/framework/downpour_lite_worker.cc
paddle/fluid/framework/downpour_lite_worker.cc
+566
-0
paddle/fluid/framework/fleet/metrics.cc
paddle/fluid/framework/fleet/metrics.cc
+1
-1
paddle/fluid/framework/fleet/metrics.h
paddle/fluid/framework/fleet/metrics.h
+1
-1
paddle/fluid/framework/pull_dense_worker.cc
paddle/fluid/framework/pull_dense_worker.cc
+9
-0
paddle/fluid/framework/the_one_ps.proto
paddle/fluid/framework/the_one_ps.proto
+0
-0
paddle/fluid/operators/abs_op.cc
paddle/fluid/operators/abs_op.cc
+26
-2
paddle/fluid/operators/conv_op_npu.cc
paddle/fluid/operators/conv_op_npu.cc
+204
-0
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+16
-27
paddle/fluid/operators/pscore/distributed_lookup_table_op.h
paddle/fluid/operators/pscore/distributed_lookup_table_op.h
+9
-12
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
+3
-0
paddle/fluid/operators/pscore/distributed_push_sparse_op.h
paddle/fluid/operators/pscore/distributed_push_sparse_op.h
+7
-10
paddle/fluid/operators/pscore/send_op.cc
paddle/fluid/operators/pscore/send_op.cc
+1
-1
paddle/fluid/platform/device/npu/npu_op_runner.cc
paddle/fluid/platform/device/npu/npu_op_runner.cc
+2
-0
paddle/fluid/pybind/fleet_py.cc
paddle/fluid/pybind/fleet_py.cc
+2
-0
paddle/phi/common/layout.h
paddle/phi/common/layout.h
+12
-0
paddle/phi/kernels/cpu/elementwise_grad_kernel.cc
paddle/phi/kernels/cpu/elementwise_grad_kernel.cc
+2
-2
paddle/phi/kernels/cpu/elementwise_kernel.cc
paddle/phi/kernels/cpu/elementwise_kernel.cc
+4
-16
paddle/phi/kernels/elementwise_kernel.h
paddle/phi/kernels/elementwise_kernel.h
+10
-10
paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
+2
-2
paddle/phi/kernels/gpu/elementwise_kernel.cu
paddle/phi/kernels/gpu/elementwise_kernel.cu
+4
-16
paddle/phi/kernels/impl/elementwise_kernel_impl.h
paddle/phi/kernels/impl/elementwise_kernel_impl.h
+10
-10
paddle/phi/ops/compat/elementwise_sig.cc
paddle/phi/ops/compat/elementwise_sig.cc
+37
-35
python/paddle/distributed/fleet/base/fleet_base.py
python/paddle/distributed/fleet/base/fleet_base.py
+104
-3
python/paddle/distributed/fleet/base/runtime_factory.py
python/paddle/distributed/fleet/base/runtime_factory.py
+1
-1
python/paddle/distributed/fleet/meta_optimizers/__init__.py
python/paddle/distributed/fleet/meta_optimizers/__init__.py
+1
-1
python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
.../paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
+13
-7
python/paddle/distributed/fleet/runtime/the_one_ps.py
python/paddle/distributed/fleet/runtime/the_one_ps.py
+8
-9
python/paddle/distributed/fleet/utils/ps_util.py
python/paddle/distributed/fleet/utils/ps_util.py
+1
-1
python/paddle/distributed/passes/ps_server_pass.py
python/paddle/distributed/passes/ps_server_pass.py
+0
-2
python/paddle/distributed/passes/ps_trainer_pass.py
python/paddle/distributed/passes/ps_trainer_pass.py
+10
-5
python/paddle/distributed/ps/the_one_ps.py
python/paddle/distributed/ps/the_one_ps.py
+141
-46
python/paddle/distributed/ps/utils/ps_program_builder.py
python/paddle/distributed/ps/utils/ps_program_builder.py
+50
-4
python/paddle/distributed/ps/utils/public.py
python/paddle/distributed/ps/utils/public.py
+35
-29
python/paddle/fluid/communicator.py
python/paddle/fluid/communicator.py
+21
-7
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+6
-7
python/paddle/fluid/device_worker.py
python/paddle/fluid/device_worker.py
+183
-38
python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py
...ests/unittests/distributed_passes/test_ps_trainer_pass.py
+15
-15
python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py
.../tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py
+106
-1
python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
...e/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+35
-5
python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
...on/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
+543
-0
python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
+1
-1
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
.../unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
+2
-1
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
...ts/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
+2
-2
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
...d/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
+2
-2
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
+3
-2
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
+9
-6
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
+6
-4
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
+1
-0
python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
+4
-3
python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+2
-0
python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+2
-0
python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
+3
-1
python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
+2
-1
python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
+2
-1
python/paddle/fluid/trainer_factory.py
python/paddle/fluid/trainer_factory.py
+1
-1
未找到文件。
cmake/external/ascend.cmake
浏览文件 @
5dcfb699
...
...
@@ -90,9 +90,9 @@ endif()
if
(
WITH_ASCEND_CL
)
macro
(
find_ascend_toolkit_version ascend_toolkit_version_info
)
file
(
READ
${
ascend_toolkit_version_info
}
ASCEND_TOOLKIT_VERSION_CONTENTS
)
string
(
REGEX MATCH
"version=([0-9]+\.[0-9]+\.[0-9]+\.[a-z]*[0-9]*)"
ASCEND_TOOLKIT_VERSION
"
${
ASCEND_TOOLKIT_VERSION_CONTENTS
}
"
)
string
(
REGEX REPLACE
"version=([0-9]+\.[0-9]+\.[0-9]+\.[a-z]*[0-9]*)"
"
\\
1"
ASCEND_TOOLKIT_VERSION
"
${
ASCEND_TOOLKIT_VERSION
}
"
)
string
(
REGEX REPLACE
"[a-z|\.]"
""
CANN_VERSION
${
ASCEND_TOOLKIT_VERSION
}
)
string
(
REGEX MATCH
"version=([0-9]+\.[0-9]+\.
(RC)?
[0-9]+\.[a-z]*[0-9]*)"
ASCEND_TOOLKIT_VERSION
"
${
ASCEND_TOOLKIT_VERSION_CONTENTS
}
"
)
string
(
REGEX REPLACE
"version=([0-9]+\.[0-9]+\.
(RC)?
[0-9]+\.[a-z]*[0-9]*)"
"
\\
1"
ASCEND_TOOLKIT_VERSION
"
${
ASCEND_TOOLKIT_VERSION
}
"
)
string
(
REGEX REPLACE
"[
A-Z]|[
a-z|\.]"
""
CANN_VERSION
${
ASCEND_TOOLKIT_VERSION
}
)
add_definitions
(
"-DCANN_VERSION_CODE=
${
CANN_VERSION
}
"
)
if
(
NOT ASCEND_TOOLKIT_VERSION
)
set
(
ASCEND_TOOLKIT_VERSION
"???"
)
...
...
paddle/fluid/distributed/ps/service/brpc_ps_client.cc
浏览文件 @
5dcfb699
...
...
@@ -238,7 +238,7 @@ int32_t BrpcPsClient::initialize() {
std
::
thread
(
std
::
bind
(
&
BrpcPsClient
::
push_dense_task_consume
,
this
));
// for debug
// _print_thread =
//
std::thread(std::bind(&BrpcPsClient::print_queue_size_thread, this));
// std::thread(std::bind(&BrpcPsClient::print_queue_size_thread, this));
return
0
;
}
...
...
@@ -1315,11 +1315,11 @@ std::future<int32_t> BrpcPsClient::push_sparse(size_t table_id,
CostTimer
parse_timer
(
"pserver_client_push_sparse_parse"
);
int
push_sparse_async_num
=
_push_sparse_task_queue_map
[
table_id
]
->
Size
();
while
(
push_sparse_async_num
>
FLAGS_pserver_max_async_call_num
)
{
// LOG(INFO) << "push_sparse Waiting for async_call_num comsume, task_num:"
// << push_sparse_async_num << ", max_task_limit:" <<
// FLAGS_pserver_max_async_call_num;
// LOG(INFO) << "push_sparse Waiting for async_call_num comsume,
// task_num:"
// << push_sparse_async_num
// << ", max_task_limit:" << FLAGS_pserver_max_async_call_num;
usleep
(
5000
);
// 5ms
// push_sparse_async_num = _push_sparse_task_queue_map[table_id]->size();
push_sparse_async_num
=
_push_sparse_task_queue_map
[
table_id
]
->
Size
();
}
auto
put_timer
=
std
::
make_shared
<
CostTimer
>
(
"client_push_sparse_put"
);
...
...
@@ -1381,8 +1381,7 @@ void BrpcPsClient::push_sparse_task_consume() {
::
ThreadPool
async_push_sparse_shard_threads
(
FLAGS_pserver_sparse_merge_thread
);
while
(
_running
)
{
platform
::
Timer
timeline
;
timeline
.
Start
();
auto
async_start_time_ms
=
butil
::
gettimeofday_ms
();
// 所有sparseTable的pushTask 进行处理
for
(
auto
&
push_sparse_task_itr
:
_push_sparse_task_queue_map
)
{
auto
table_id
=
push_sparse_task_itr
.
first
;
...
...
@@ -1497,9 +1496,8 @@ void BrpcPsClient::push_sparse_task_consume() {
std
::
vector
<
std
::
future
<
int
>>
().
swap
(
merge_status
);
}
}
timeline
.
Pause
();
auto
wait_ms
=
FLAGS_pserver_async_push_sparse_interval_ms
-
(
timeline
.
ElapsedMS
());
auto
wait_ms
=
FLAGS_pserver_async_push_sparse_interval_ms
-
(
butil
::
gettimeofday_ms
()
-
async_start_time_ms
);
if
(
wait_ms
>
0
)
{
usleep
(
wait_ms
*
1000
);
}
...
...
@@ -1661,9 +1659,10 @@ std::future<int32_t> BrpcPsClient::push_dense(const Region *regions,
std
::
make_shared
<
CostTimer
>
(
"pserver_client_push_dense_parse"
);
int
push_dense_async_num
=
_push_dense_task_queue_map
[
table_id
]
->
Size
();
while
(
push_dense_async_num
>
FLAGS_pserver_max_async_call_num
)
{
LOG
(
INFO
)
<<
"push_dense Waiting for async_call_num comsume, task_num:"
<<
push_dense_async_num
<<
", max_task_limit:"
<<
FLAGS_pserver_max_async_call_num
;
// LOG(INFO) << "push_dense Waiting for async_call_num comsume,
// task_num:"
// << push_dense_async_num
// << ", max_task_limit:" << FLAGS_pserver_max_async_call_num;
usleep
(
5000
);
// 5ms
push_dense_async_num
=
_push_dense_task_queue_map
[
table_id
]
->
Size
();
}
...
...
@@ -1701,8 +1700,7 @@ void BrpcPsClient::push_dense_task_consume() {
static
bool
scale_gradient
=
FLAGS_pserver_scale_gradient_by_merge
;
::
ThreadPool
async_merge_dense_threads
(
10
);
while
(
_running
)
{
platform
::
Timer
timeline
;
timeline
.
Start
();
auto
async_start_time_ms
=
butil
::
gettimeofday_ms
();
for
(
auto
&
task_queue_itr
:
_push_dense_task_queue_map
)
{
auto
&
task_queue
=
task_queue_itr
.
second
;
auto
queue_size
=
task_queue
->
Size
();
...
...
@@ -1791,9 +1789,8 @@ void BrpcPsClient::push_dense_task_consume() {
push_dense_raw_gradient
(
task_ptr
,
total_send_data
,
total_send_data_size
,
closure
);
}
timeline
.
Pause
();
auto
wait_ms
=
FLAGS_pserver_async_push_dense_interval_ms
-
(
timeline
.
ElapsedMS
());
auto
wait_ms
=
FLAGS_pserver_async_push_dense_interval_ms
-
(
butil
::
gettimeofday_ms
()
-
async_start_time_ms
);
if
(
wait_ms
>
0
)
{
usleep
(
wait_ms
*
1000
);
}
...
...
paddle/fluid/distributed/ps/service/communicator/communicator.cc
浏览文件 @
5dcfb699
...
...
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/distributed/ps/service/communicator/communicator.h"
#include <google/protobuf/text_format.h>
#include "gflags/gflags.h"
#include "paddle/fluid/distributed/ps/service/brpc_ps_client.h"
#include "paddle/fluid/distributed/ps/wrapper/fleet.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/string_helper.h"
...
...
@@ -66,34 +65,9 @@ std::shared_ptr<Communicator> Communicator::communicator_(nullptr);
void
Communicator
::
InitBrpcClient
(
const
std
::
string
&
dist_desc
,
const
std
::
vector
<
std
::
string
>
&
host_sign_list
)
{
// not used, just for psclient's init
std
::
map
<
uint64_t
,
std
::
vector
<
paddle
::
distributed
::
Region
>>
_dense_pull_regions
;
for
(
auto
&
iter
:
recv_varname_to_ctx_
)
{
auto
tid
=
iter
.
first
;
auto
var_names
=
iter
.
second
;
auto
&
regions
=
_dense_pull_regions
[
tid
];
regions
.
reserve
(
var_names
.
size
());
for
(
auto
&
t
:
var_names
)
{
Variable
*
var
=
recv_scope_
->
FindVar
(
t
);
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
float
*
w
=
tensor
->
data
<
float
>
();
paddle
::
distributed
::
Region
reg
(
w
,
tensor
->
numel
());
regions
.
emplace_back
(
std
::
move
(
reg
));
}
}
auto
fleet
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
if
(
_worker_ptr
.
get
()
==
nullptr
)
{
google
::
protobuf
::
TextFormat
::
ParseFromString
(
dist_desc
,
&
_ps_param
);
init_gflag
(
_ps_param
.
init_gflags
());
servers_
=
host_sign_list
.
size
();
_ps_env
=
paddle
::
distributed
::
PaddlePSEnvironment
();
_ps_env
.
set_ps_servers
(
&
host_sign_list
,
servers_
);
_worker_ptr
=
std
::
unique_ptr
<
paddle
::
distributed
::
PSClient
>
(
paddle
::
distributed
::
PSClientFactory
::
create
(
_ps_param
));
_worker_ptr
->
configure
(
_ps_param
,
_dense_pull_regions
,
_ps_env
,
trainer_id_
);
_worker_ptr
=
fleet
->
worker_ptr_
;
}
return
;
}
...
...
@@ -146,11 +120,11 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
for
(
auto
&
t
:
varnames
)
{
Variable
*
var
=
scope
->
FindVar
(
t
);
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
VLOG
(
1
)
<<
"AsyncCommunicator::RecvNoBarrier Var "
<<
t
<<
" On gpu? "
VLOG
(
3
)
<<
"AsyncCommunicator::RecvNoBarrier Var "
<<
t
<<
" On gpu? "
<<
platform
::
is_gpu_place
(
tensor
->
place
());
float
*
temp_recv_data
=
tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
VLOG
(
1
)
<<
"AsyncCommunicator::RpcRecvDense Var "
<<
t
<<
" table_id "
VLOG
(
3
)
<<
"AsyncCommunicator::RpcRecvDense Var "
<<
t
<<
" table_id "
<<
table_id
<<
" Temp_data[0] "
<<
temp_recv_data
[
0
]
<<
" Temp_data[-1] "
<<
temp_recv_data
[
tensor
->
numel
()
-
1
];
if
(
platform
::
is_gpu_place
(
tensor
->
place
()))
{
...
...
@@ -481,7 +455,7 @@ void AsyncCommunicator::RecvNoBarrier() {
for
(
auto
&
t
:
var_names
)
{
Variable
*
var
=
recv_scope_
->
FindVar
(
t
);
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
VLOG
(
1
)
<<
"AsyncCommunicator::RecvNoBarrier Var "
<<
t
<<
" On gpu? "
VLOG
(
3
)
<<
"AsyncCommunicator::RecvNoBarrier Var "
<<
t
<<
" On gpu? "
<<
platform
::
is_gpu_place
(
tensor
->
place
());
if
(
platform
::
is_gpu_place
(
tensor
->
place
()))
{
#ifdef PADDLE_WITH_CUDA
...
...
@@ -653,7 +627,7 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
input
->
lod
().
size
()
?
input
->
lod
()[
0
].
size
()
-
1
:
input
->
dims
()[
0
];
if
(
batch_size
==
-
1
)
{
batch_size
=
cur_batch_size
;
}
else
{
}
else
if
(
batch_size
!=
cur_batch_size
)
{
// CHECK(batch_size == cur_batch_size); // NOLINT
batch_size_consist
=
false
;
break
;
...
...
@@ -676,7 +650,8 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
size_t
output_len
=
0
;
size_t
input_idx
=
0
;
VLOG
(
2
)
<<
"fleet.cc::emb_dim: "
<<
fea_dim
;
VLOG
(
2
)
<<
"fleet.cc::emb_dim: "
<<
fea_dim
<<
" batch_size: "
<<
batch_size
<<
" batch_size_consist: "
<<
batch_size_consist
;
// TODO(zhaocaibei123): check type of show/clk is int? float? uint64?
// const long int* show_tensor = shows->data<int64_t>();
...
...
@@ -687,13 +662,14 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
for
(
size_t
index
=
0
;
index
<
inputs
->
size
();
++
index
)
{
framework
::
LoDTensor
*
g_tensor
=
outputs
->
at
(
index
);
float
*
g
=
g_tensor
->
data
<
float
>
();
// no cvm
if
(
batch_size_consist
)
{
// TODO(zhaocaibei123): add config
// scale_sparse_gradient_with_batch_size_
Eigen
::
Map
<
Eigen
::
Matrix
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
g_mat
(
g
,
g_tensor
->
numel
()
/
fea_dim
,
fea_dim
);
g_mat
.
rightCols
(
fea_dim
)
*=
batch_size
;
g_mat
.
rightCols
(
fea_dim
-
2
)
*=
batch_size
;
// hard code here, because of cvm_grad op
}
const
framework
::
LoDTensor
*
tensor
=
inputs
->
at
(
index
);
...
...
@@ -710,16 +686,16 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
continue
;
}
push_keys
.
emplace_back
(
real_id
);
push_values
.
emplace_back
(
fea_dim
+
3
);
push_values
.
emplace_back
(
fea_dim
+
1
);
// slot show clk grad... consistent with CtrCommonPushValue defined in
// ctr_accessor.h
push_values
.
back
()[
0
]
=
2
;
// TODO(zhaocaibei123): slot
push_values
.
back
()[
1
]
=
(
i
>=
show_size
?
1
:
static_cast
<
float
>
(
show_tensor
[
i
]));
push_values
.
back
()[
2
]
=
(
i
>=
clk_size
?
0
:
static_cast
<
float
>
(
clk_tensor
[
i
]));
//
push_values.back()[1] =
//
(i >= show_size ? 1 : static_cast<float>(show_tensor[i]));
//
push_values.back()[2] =
//
(i >= clk_size ? 0 : static_cast<float>(clk_tensor[i]));
float
*
data
=
push_values
.
back
().
data
()
+
3
;
float
*
data
=
push_values
.
back
().
data
()
+
1
;
// hard code here
memcpy
(
data
,
g
+
output_len
,
sizeof
(
float
)
*
fea_dim
);
...
...
@@ -733,16 +709,16 @@ void AsyncCommunicator::PushSparseFromTensorAsync(
continue
;
}
push_keys
.
emplace_back
(
real_id
);
push_values
.
emplace_back
(
fea_dim
+
3
);
push_values
.
emplace_back
(
fea_dim
+
1
);
// slot show clk grad... consistent with CtrCommonPushValue defined in
// ctr_accessor.h
push_values
.
back
()[
0
]
=
2
;
// TODO(zhaocaibei123): slot
push_values
.
back
()[
1
]
=
(
i
>=
show_size
?
1
:
static_cast
<
float
>
(
show_tensor
[
i
]));
push_values
.
back
()[
2
]
=
(
i
>=
clk_size
?
0
:
static_cast
<
float
>
(
clk_tensor
[
i
]));
//
push_values.back()[1] =
//
(i >= show_size ? 1 : static_cast<float>(show_tensor[i]));
//
push_values.back()[2] =
//
(i >= clk_size ? 0 : static_cast<float>(clk_tensor[i]));
float
*
data
=
push_values
.
back
().
data
()
+
3
;
float
*
data
=
push_values
.
back
().
data
()
+
1
;
memcpy
(
data
,
g
+
output_len
,
sizeof
(
float
)
*
fea_dim
);
...
...
@@ -837,7 +813,7 @@ void AsyncCommunicator::Stop() {
if
(
!
communicator_
)
{
VLOG
(
0
)
<<
"Communicator is not inited, do nothing"
;
}
else
{
_worker_ptr
->
finalize_worker
();
//
_worker_ptr->finalize_worker();
VLOG
(
1
)
<<
"client finalize_worker done"
;
if
(
recv_thread_
)
{
VLOG
(
1
)
<<
"stop recv thread"
;
...
...
paddle/fluid/distributed/ps/service/communicator/communicator.h
浏览文件 @
5dcfb699
...
...
@@ -360,13 +360,13 @@ class Communicator {
PSClient
*
GetPsClient
()
{
return
_worker_ptr
.
get
();
}
std
::
unique
_ptr
<
paddle
::
distributed
::
PSClient
>
GetPsClientPtr
()
{
std
::
shared
_ptr
<
paddle
::
distributed
::
PSClient
>
GetPsClientPtr
()
{
return
std
::
move
(
_worker_ptr
);
}
RecvCtxMap
&
GetRecvCtxMap
()
{
return
recv_varname_to_ctx_
;
}
std
::
unique
_ptr
<
PSClient
>
_worker_ptr
;
// pointer to worker
std
::
shared
_ptr
<
PSClient
>
_worker_ptr
;
// pointer to worker
protected:
bool
running_
=
false
;
...
...
paddle/fluid/distributed/ps/table/CMakeLists.txt
浏览文件 @
5dcfb699
...
...
@@ -43,11 +43,12 @@ set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPI
set_source_files_properties
(
sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ctr_double_accessor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
ctr_accessor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
sparse_accessor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
downpour_ctr_accessor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
memory_sparse_table.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_library
(
sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS
${
TABLE_DEPS
}
ps_framework_proto
)
cc_library
(
ctr_double_accessor SRCS ctr_double_accessor.cc DEPS
${
TABLE_DEPS
}
ps_framework_proto sparse_sgd_rule
)
cc_library
(
ctr_accessor SRCS ctr_accessor.cc DEPS
${
TABLE_DEPS
}
ps_framework_proto sparse_sgd_rule
)
cc_library
(
ctr_accessor SRCS ctr_accessor.cc
sparse_accessor.cc
DEPS
${
TABLE_DEPS
}
ps_framework_proto sparse_sgd_rule
)
cc_library
(
downpour_ctr_accessor SRCS downpour_ctr_accessor.cc DEPS
${
TABLE_DEPS
}
ps_framework_proto sparse_sgd_rule
)
cc_library
(
memory_sparse_table SRCS memory_sparse_table.cc DEPS ps_framework_proto
${
TABLE_DEPS
}
fs afs_wrapper ctr_accessor common_table
)
...
...
paddle/fluid/distributed/ps/table/common_dense_table.cc
浏览文件 @
5dcfb699
...
...
@@ -115,6 +115,8 @@ int32_t CommonDenseTable::initialize_optimizer() {
// optimizer_->set_global_lr(_global_lr); //no use
}
else
if
(
name
==
"sum"
)
{
optimizer_
=
std
::
make_shared
<
DSUM
>
(
common
,
&
values_
);
}
else
if
(
name
==
"summary"
)
{
optimizer_
=
std
::
make_shared
<
DSummary
>
(
common
,
&
values_
);
}
else
{
VLOG
(
0
)
<<
"init optimizer failed"
;
}
...
...
@@ -339,19 +341,27 @@ int32_t CommonDenseTable::save(const std::string& path,
auto
common
=
_config
.
common
();
int
size
=
static_cast
<
int
>
(
common
.
params
().
size
());
std
::
ostringstream
os
;
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
auto
&
varname
=
common
.
params
()[
x
];
auto
&
dim
=
common
.
dims
()[
x
];
VLOG
(
0
)
<<
"CommonDenseTable::save dim "
<<
x
<<
" size: "
<<
dim
;
for
(
int
y
=
0
;
y
<
dim
;
++
y
)
{
os
.
clear
();
os
.
str
(
""
);
os
<<
values_
[
x
][
y
];
if
(
dim
==
param_dim_
)
{
result_buffer_param
[
y
].
emplace_back
(
std
::
move
(
os
.
str
()));
}
else
{
result_buffer_fixed_len
.
emplace_back
(
std
::
move
(
os
.
str
()));
if
(
_config
.
common
().
name
()
==
"summary"
)
{
for
(
int
x
=
0
;
x
<
param_dim_
;
++
x
)
{
result_buffer_param
[
x
].
emplace_back
(
std
::
to_string
(
values_
[
param_idx_
][
x
]));
}
}
else
{
std
::
ostringstream
os
;
for
(
int
x
=
0
;
x
<
size
;
++
x
)
{
auto
&
varname
=
common
.
params
()[
x
];
auto
&
dim
=
common
.
dims
()[
x
];
VLOG
(
3
)
<<
"CommonDenseTable::save dim "
<<
x
<<
" size: "
<<
dim
;
for
(
int
y
=
0
;
y
<
dim
;
++
y
)
{
os
.
clear
();
os
.
str
(
""
);
os
<<
values_
[
x
][
y
];
if
(
dim
==
param_dim_
)
{
result_buffer_param
[
y
].
emplace_back
(
std
::
move
(
os
.
str
()));
}
else
{
result_buffer_fixed_len
.
emplace_back
(
std
::
move
(
os
.
str
()));
}
}
}
}
...
...
paddle/fluid/distributed/ps/table/ctr_accessor.cc
浏览文件 @
5dcfb699
...
...
@@ -65,7 +65,7 @@ size_t CtrCommonAccessor::mf_size() {
// pull value
size_t
CtrCommonAccessor
::
select_dim
()
{
auto
embedx_dim
=
_config
.
embedx_dim
();
return
1
+
embedx_dim
;
return
3
+
embedx_dim
;
}
size_t
CtrCommonAccessor
::
select_dim_size
(
size_t
dim
)
{
return
sizeof
(
float
);
}
...
...
@@ -213,6 +213,10 @@ int32_t CtrCommonAccessor::select(float** select_values, const float** values,
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
select_value
=
select_values
[
value_item
];
const
float
*
value
=
values
[
value_item
];
select_value
[
CtrCommonPullValue
::
show_index
()]
=
value
[
common_feature_value
.
show_index
()];
select_value
[
CtrCommonPullValue
::
click_index
()]
=
value
[
common_feature_value
.
click_index
()];
select_value
[
CtrCommonPullValue
::
embed_w_index
()]
=
value
[
common_feature_value
.
embed_w_index
()];
memcpy
(
select_value
+
CtrCommonPullValue
::
embedx_w_index
(),
...
...
paddle/fluid/distributed/ps/table/ctr_accessor.h
浏览文件 @
5dcfb699
...
...
@@ -24,6 +24,7 @@
namespace
paddle
{
namespace
distributed
{
// DownpourUnitAccessor
class
CtrCommonAccessor
:
public
ValueAccessor
{
public:
struct
CtrCommonFeatureValue
{
...
...
@@ -106,15 +107,25 @@ class CtrCommonAccessor : public ValueAccessor {
struct
CtrCommonPullValue
{
/*
float show;
float click;
float embed_w;
std::vector<float> embedx_w;
*/
static
int
dim
(
int
embedx_dim
)
{
return
1
+
embedx_dim
;
}
static
int
dim
(
int
embedx_dim
)
{
return
3
+
embedx_dim
;
}
static
int
dim_size
(
size_t
dim
)
{
return
sizeof
(
float
);
}
static
int
size
(
int
embedx_dim
)
{
return
dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
embed_w_index
()
{
return
0
;
}
static
int
embedx_w_index
()
{
return
1
;
}
static
int
show_index
()
{
return
0
;
}
static
int
click_index
()
{
return
1
;
}
static
int
embed_w_index
()
{
return
2
;
}
static
int
embedx_w_index
()
{
return
3
;
}
static
float
&
show
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
show_index
()];
}
static
float
&
click
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
click_index
()];
}
static
float
&
embed_w
(
float
*
val
)
{
return
val
[
CtrCommonPullValue
::
embed_w_index
()];
}
...
...
paddle/fluid/distributed/ps/table/depends/dense.h
浏览文件 @
5dcfb699
...
...
@@ -196,26 +196,19 @@ class DAdamD2Sum : public DenseOptimizer {
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"LearningRate"
)
{
learning_rate
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Param"
)
{
}
else
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"Moment"
)
{
}
else
if
(
names
[
x
]
==
"Moment"
)
{
mom_velocity
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"G2Sum"
)
{
}
else
if
(
names
[
x
]
==
"G2Sum"
)
{
ada_g2sum
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"D2Sum"
)
{
}
else
if
(
names
[
x
]
==
"D2Sum"
)
{
ada_d2sum
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"MomentDecayRate"
)
{
}
else
if
(
names
[
x
]
==
"MomentDecayRate"
)
{
mom_decay_rate
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"AdaDecayRate"
)
{
}
else
if
(
names
[
x
]
==
"AdaDecayRate"
)
{
ada_decay_rate
=
(
*
values
)[
x
].
data
();
}
if
(
names
[
x
]
==
"AdaEpsilon"
)
{
}
else
if
(
names
[
x
]
==
"AdaEpsilon"
)
{
ada_epsilon
=
(
*
values
)[
x
].
data
();
}
}
...
...
@@ -268,5 +261,34 @@ class DAdamD2Sum : public DenseOptimizer {
float
*
ada_epsilon
;
};
// for data_norm
class
DSummary
:
public
DenseOptimizer
{
public:
explicit
DSummary
(
const
CommonAccessorParameter
&
accessor
,
std
::
vector
<
std
::
vector
<
float
>>*
values
)
{
auto
&
names
=
accessor
.
params
();
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
names
.
size
());
++
x
)
{
if
(
names
[
x
]
==
"Param"
)
{
param
=
(
*
values
)[
x
].
data
();
}
else
if
(
names
[
x
]
==
"SummaryDecayRate"
)
{
summary_decay_rate
=
(
*
values
)[
x
].
data
();
}
}
}
void
update
(
const
float
*
update_values
,
size_t
num
,
int
begin
,
int
end
)
override
{
auto
update_numel
=
end
-
begin
;
Eigen
::
Map
<
Eigen
::
MatrixXf
>
mat_w
(
param
+
begin
,
1
,
update_numel
);
Eigen
::
Map
<
const
Eigen
::
MatrixXf
>
mat_grad
(
update_values
+
begin
,
1
,
update_numel
);
mat_w
=
mat_w
*
summary_decay_rate_d
+
mat_grad
;
}
float
*
summary_decay_rate
;
double
summary_decay_rate_d
=
0.999999
;
float
*
param
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/sparse_accessor.cc
0 → 100644
浏览文件 @
5dcfb699
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/distributed/ps/table/sparse_accessor.h"
#include <gflags/gflags.h>
#include "glog/logging.h"
#include "paddle/fluid/string/string_helper.h"
namespace
paddle
{
namespace
distributed
{
int
SparseAccessor
::
initialize
()
{
auto
name
=
_config
.
embed_sgd_param
().
name
();
_embed_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embed_sgd_rule
->
load_config
(
_config
.
embed_sgd_param
(),
1
);
name
=
_config
.
embedx_sgd_param
().
name
();
_embedx_sgd_rule
=
CREATE_PSCORE_CLASS
(
SparseValueSGDRule
,
name
);
_embedx_sgd_rule
->
load_config
(
_config
.
embedx_sgd_param
(),
_config
.
embedx_dim
());
sparse_feature_value
.
embed_sgd_dim
=
_embed_sgd_rule
->
dim
();
sparse_feature_value
.
embedx_dim
=
_config
.
embedx_dim
();
sparse_feature_value
.
embedx_sgd_dim
=
_embedx_sgd_rule
->
dim
();
_show_click_decay_rate
=
_config
.
ctr_accessor_param
().
show_click_decay_rate
();
return
0
;
}
void
SparseAccessor
::
GetTableInfo
(
AccessorInfo
&
info
)
{
info
.
dim
=
dim
();
info
.
size
=
size
();
info
.
select_dim
=
select_dim
();
info
.
select_size
=
select_size
();
info
.
update_dim
=
update_dim
();
info
.
update_size
=
update_size
();
info
.
fea_dim
=
fea_dim
();
}
size_t
SparseAccessor
::
dim
()
{
return
sparse_feature_value
.
dim
();
}
size_t
SparseAccessor
::
dim_size
(
size_t
dim
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
return
sparse_feature_value
.
dim_size
(
dim
,
embedx_dim
);
}
size_t
SparseAccessor
::
size
()
{
return
sparse_feature_value
.
size
();
}
size_t
SparseAccessor
::
mf_size
()
{
return
(
_config
.
embedx_dim
()
+
sparse_feature_value
.
embedx_sgd_dim
)
*
sizeof
(
float
);
// embedx embedx_g2sum
}
// pull value
size_t
SparseAccessor
::
select_dim
()
{
auto
embedx_dim
=
_config
.
embedx_dim
();
return
1
+
embedx_dim
;
}
size_t
SparseAccessor
::
select_dim_size
(
size_t
dim
)
{
return
sizeof
(
float
);
}
size_t
SparseAccessor
::
select_size
()
{
return
select_dim
()
*
sizeof
(
float
);
}
// push value
size_t
SparseAccessor
::
update_dim
()
{
auto
embedx_dim
=
_config
.
embedx_dim
();
return
4
+
embedx_dim
;
}
size_t
SparseAccessor
::
update_dim_size
(
size_t
dim
)
{
return
sizeof
(
float
);
}
size_t
SparseAccessor
::
update_size
()
{
return
update_dim
()
*
sizeof
(
float
);
}
bool
SparseAccessor
::
shrink
(
float
*
value
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delete_after_unseen_days
=
_config
.
ctr_accessor_param
().
delete_after_unseen_days
();
auto
delete_threshold
=
_config
.
ctr_accessor_param
().
delete_threshold
();
// time_decay first
sparse_feature_value
.
show
(
value
)
*=
_show_click_decay_rate
;
sparse_feature_value
.
click
(
value
)
*=
_show_click_decay_rate
;
// shrink after
auto
score
=
show_click_score
(
sparse_feature_value
.
show
(
value
),
sparse_feature_value
.
click
(
value
));
auto
unseen_days
=
sparse_feature_value
.
unseen_days
(
value
);
if
(
score
<
delete_threshold
||
unseen_days
>
delete_after_unseen_days
)
{
return
true
;
}
return
false
;
}
bool
SparseAccessor
::
save
(
float
*
value
,
int
param
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
// save all
case
0
:
{
return
true
;
}
// save xbox delta
case
1
:
// save xbox base
case
2
:
{
if
(
show_click_score
(
sparse_feature_value
.
show
(
value
),
sparse_feature_value
.
click
(
value
))
>=
base_threshold
&&
sparse_feature_value
.
delta_score
(
value
)
>=
delta_threshold
&&
sparse_feature_value
.
unseen_days
(
value
)
<=
delta_keep_days
)
{
// do this after save, because it must not be modified when retry
if
(
param
==
2
)
{
sparse_feature_value
.
delta_score
(
value
)
=
0
;
}
return
true
;
}
else
{
return
false
;
}
}
// already decayed in shrink
case
3
:
{
// do this after save, because it must not be modified when retry
// sparse_feature_value.unseen_days(value)++;
return
true
;
}
// save revert batch_model
case
5
:
{
return
true
;
}
default:
return
true
;
}
}
void
SparseAccessor
::
update_stat_after_save
(
float
*
value
,
int
param
)
{
auto
base_threshold
=
_config
.
ctr_accessor_param
().
base_threshold
();
auto
delta_threshold
=
_config
.
ctr_accessor_param
().
delta_threshold
();
auto
delta_keep_days
=
_config
.
ctr_accessor_param
().
delta_keep_days
();
if
(
param
==
2
)
{
delta_threshold
=
0
;
}
switch
(
param
)
{
case
1
:
{
if
(
show_click_score
(
sparse_feature_value
.
show
(
value
),
sparse_feature_value
.
click
(
value
))
>=
base_threshold
&&
sparse_feature_value
.
delta_score
(
value
)
>=
delta_threshold
&&
sparse_feature_value
.
unseen_days
(
value
)
<=
delta_keep_days
)
{
sparse_feature_value
.
delta_score
(
value
)
=
0
;
}
}
return
;
case
3
:
{
sparse_feature_value
.
unseen_days
(
value
)
++
;
}
return
;
default:
return
;
}
}
int32_t
SparseAccessor
::
create
(
float
**
values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
value
=
values
[
value_item
];
value
[
sparse_feature_value
.
unseen_days_index
()]
=
0
;
value
[
sparse_feature_value
.
delta_score_index
()]
=
0
;
value
[
sparse_feature_value
.
show_index
()]
=
0
;
value
[
sparse_feature_value
.
click_index
()]
=
0
;
value
[
sparse_feature_value
.
slot_index
()]
=
-
1
;
_embed_sgd_rule
->
init_value
(
value
+
sparse_feature_value
.
embed_w_index
(),
value
+
sparse_feature_value
.
embed_g2sum_index
());
_embedx_sgd_rule
->
init_value
(
value
+
sparse_feature_value
.
embedx_w_index
(),
value
+
sparse_feature_value
.
embedx_g2sum_index
(),
false
);
}
return
0
;
}
bool
SparseAccessor
::
need_extend_mf
(
float
*
value
)
{
float
show
=
value
[
sparse_feature_value
.
show_index
()];
float
click
=
value
[
sparse_feature_value
.
click_index
()];
float
score
=
(
show
-
click
)
*
_config
.
ctr_accessor_param
().
nonclk_coeff
()
+
click
*
_config
.
ctr_accessor_param
().
click_coeff
();
return
score
>=
_config
.
embedx_threshold
();
}
bool
SparseAccessor
::
has_mf
(
size_t
size
)
{
return
size
>
sparse_feature_value
.
embedx_g2sum_index
();
}
// from SparseFeatureValue to SparsePullValue
int32_t
SparseAccessor
::
select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
select_value
=
select_values
[
value_item
];
const
float
*
value
=
values
[
value_item
];
select_value
[
SparsePullValue
::
embed_w_index
()]
=
value
[
sparse_feature_value
.
embed_w_index
()];
memcpy
(
select_value
+
SparsePullValue
::
embedx_w_index
(),
value
+
sparse_feature_value
.
embedx_w_index
(),
embedx_dim
*
sizeof
(
float
));
}
return
0
;
}
// from SparsePushValue to SparsePushValue
// first dim: item
// second dim: field num
int32_t
SparseAccessor
::
merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
size_t
total_dim
=
SparsePushValue
::
dim
(
embedx_dim
);
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
update_value
=
update_values
[
value_item
];
const
float
*
other_update_value
=
other_update_values
[
value_item
];
for
(
auto
i
=
0u
;
i
<
total_dim
;
++
i
)
{
if
(
i
!=
SparsePushValue
::
slot_index
())
{
update_value
[
i
]
+=
other_update_value
[
i
];
}
}
}
return
0
;
}
// from SparsePushValue to SparseFeatureValue
// first dim: item
// second dim: field num
int32_t
SparseAccessor
::
update
(
float
**
update_values
,
const
float
**
push_values
,
size_t
num
)
{
auto
embedx_dim
=
_config
.
embedx_dim
();
for
(
size_t
value_item
=
0
;
value_item
<
num
;
++
value_item
)
{
float
*
update_value
=
update_values
[
value_item
];
const
float
*
push_value
=
push_values
[
value_item
];
float
push_show
=
push_value
[
SparsePushValue
::
show_index
()];
float
push_click
=
push_value
[
SparsePushValue
::
click_index
()];
float
slot
=
push_value
[
SparsePushValue
::
slot_index
()];
update_value
[
sparse_feature_value
.
show_index
()]
+=
push_show
;
update_value
[
sparse_feature_value
.
click_index
()]
+=
push_click
;
update_value
[
sparse_feature_value
.
slot_index
()]
=
slot
;
update_value
[
sparse_feature_value
.
delta_score_index
()]
+=
(
push_show
-
push_click
)
*
_config
.
ctr_accessor_param
().
nonclk_coeff
()
+
push_click
*
_config
.
ctr_accessor_param
().
click_coeff
();
update_value
[
sparse_feature_value
.
unseen_days_index
()]
=
0
;
_embed_sgd_rule
->
update_value
(
update_value
+
sparse_feature_value
.
embed_w_index
(),
update_value
+
sparse_feature_value
.
embed_g2sum_index
(),
push_value
+
SparsePushValue
::
embed_g_index
());
_embedx_sgd_rule
->
update_value
(
update_value
+
sparse_feature_value
.
embedx_w_index
(),
update_value
+
sparse_feature_value
.
embedx_g2sum_index
(),
push_value
+
SparsePushValue
::
embedx_g_index
());
}
return
0
;
}
bool
SparseAccessor
::
create_value
(
int
stage
,
const
float
*
value
)
{
// stage == 0, pull
// stage == 1, push
if
(
stage
==
0
)
{
return
true
;
}
else
if
(
stage
==
1
)
{
// operation
auto
show
=
SparsePushValue
::
show
(
const_cast
<
float
*>
(
value
));
auto
click
=
SparsePushValue
::
click
(
const_cast
<
float
*>
(
value
));
auto
score
=
show_click_score
(
show
,
click
);
if
(
score
<=
0
)
{
return
false
;
}
if
(
score
>=
1
)
{
return
true
;
}
return
local_uniform_real_distribution
<
float
>
()(
local_random_engine
())
<
score
;
}
else
{
return
true
;
}
}
float
SparseAccessor
::
show_click_score
(
float
show
,
float
click
)
{
auto
nonclk_coeff
=
_config
.
ctr_accessor_param
().
nonclk_coeff
();
auto
click_coeff
=
_config
.
ctr_accessor_param
().
click_coeff
();
return
(
show
-
click
)
*
nonclk_coeff
+
click
*
click_coeff
;
}
std
::
string
SparseAccessor
::
parse_to_string
(
const
float
*
v
,
int
param
)
{
thread_local
std
::
ostringstream
os
;
os
.
clear
();
os
.
str
(
""
);
os
<<
v
[
0
]
<<
" "
<<
v
[
1
]
<<
" "
<<
v
[
2
]
<<
" "
<<
v
[
3
]
<<
" "
<<
v
[
4
]
<<
" "
<<
v
[
5
];
for
(
int
i
=
sparse_feature_value
.
embed_g2sum_index
();
i
<
sparse_feature_value
.
embedx_w_index
();
i
++
)
{
os
<<
" "
<<
v
[
i
];
}
auto
show
=
sparse_feature_value
.
show
(
const_cast
<
float
*>
(
v
));
auto
click
=
sparse_feature_value
.
click
(
const_cast
<
float
*>
(
v
));
auto
score
=
show_click_score
(
show
,
click
);
if
(
score
>=
_config
.
embedx_threshold
()
&&
param
>
sparse_feature_value
.
embedx_w_index
())
{
for
(
auto
i
=
sparse_feature_value
.
embedx_w_index
();
i
<
sparse_feature_value
.
dim
();
++
i
)
{
os
<<
" "
<<
v
[
i
];
}
}
return
os
.
str
();
}
int
SparseAccessor
::
parse_from_string
(
const
std
::
string
&
str
,
float
*
value
)
{
int
embedx_dim
=
_config
.
embedx_dim
();
_embedx_sgd_rule
->
init_value
(
value
+
sparse_feature_value
.
embedx_w_index
(),
value
+
sparse_feature_value
.
embedx_g2sum_index
());
auto
ret
=
paddle
::
string
::
str_to_float
(
str
.
data
(),
value
);
CHECK
(
ret
>=
6
)
<<
"expect more than 6 real:"
<<
ret
;
return
ret
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/sparse_accessor.h
0 → 100644
浏览文件 @
5dcfb699
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
#include "paddle/fluid/distributed/common/registerer.h"
#include "paddle/fluid/distributed/ps.pb.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h"
namespace
paddle
{
namespace
distributed
{
// no show click, for word2vec(DownpourSparseValueAccessor)
class
SparseAccessor
:
public
ValueAccessor
{
public:
struct
SparseFeatureValue
{
/*
float slot;
float unseen_days;
float delta_score;
float show;
float click;
float embed_w;
std::vector<float> embed_g2sum;
std::vector<float> embedx_w;
std::<vector>float embedx_g2sum;
*/
int
dim
()
{
return
6
+
embed_sgd_dim
+
embedx_sgd_dim
+
embedx_dim
;
}
int
dim_size
(
size_t
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
int
size
()
{
return
dim
()
*
sizeof
(
float
);
}
int
slot_index
()
{
return
0
;
}
int
unseen_days_index
()
{
return
slot_index
()
+
1
;
}
int
delta_score_index
()
{
return
unseen_days_index
()
+
1
;
}
int
show_index
()
{
return
delta_score_index
()
+
1
;
}
int
click_index
()
{
return
show_index
()
+
1
;
}
int
embed_w_index
()
{
return
click_index
()
+
1
;
}
int
embed_g2sum_index
()
{
return
embed_w_index
()
+
1
;
}
int
embedx_w_index
()
{
return
embed_g2sum_index
()
+
embed_sgd_dim
;
}
int
embedx_g2sum_index
()
{
return
embedx_w_index
()
+
embedx_dim
;
}
float
&
unseen_days
(
float
*
val
)
{
return
val
[
unseen_days_index
()];
}
float
&
delta_score
(
float
*
val
)
{
return
val
[
delta_score_index
()];
}
float
&
show
(
float
*
val
)
{
return
val
[
show_index
()];
}
float
&
click
(
float
*
val
)
{
return
val
[
click_index
()];
}
float
&
slot
(
float
*
val
)
{
return
val
[
slot_index
()];
}
float
&
embed_w
(
float
*
val
)
{
return
val
[
embed_w_index
()];
}
float
&
embed_g2sum
(
float
*
val
)
{
return
val
[
embed_g2sum_index
()];
}
float
&
embedx_w
(
float
*
val
)
{
return
val
[
embedx_w_index
()];
}
float
&
embedx_g2sum
(
float
*
val
)
{
return
val
[
embedx_g2sum_index
()];
}
int
embed_sgd_dim
;
int
embedx_dim
;
int
embedx_sgd_dim
;
};
struct
SparsePushValue
{
/*
float slot;
float show;
float click;
float embed_g;
std::vector<float> embedx_g;
*/
static
int
dim
(
int
embedx_dim
)
{
return
4
+
embedx_dim
;
}
static
int
dim_size
(
int
dim
,
int
embedx_dim
)
{
return
sizeof
(
float
);
}
static
int
size
(
int
embedx_dim
)
{
return
dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
slot_index
()
{
return
0
;
}
static
int
show_index
()
{
return
SparsePushValue
::
slot_index
()
+
1
;
}
static
int
click_index
()
{
return
SparsePushValue
::
show_index
()
+
1
;
}
static
int
embed_g_index
()
{
return
SparsePushValue
::
click_index
()
+
1
;
}
static
int
embedx_g_index
()
{
return
SparsePushValue
::
embed_g_index
()
+
1
;
}
static
float
&
slot
(
float
*
val
)
{
return
val
[
SparsePushValue
::
slot_index
()];
}
static
float
&
show
(
float
*
val
)
{
return
val
[
SparsePushValue
::
show_index
()];
}
static
float
&
click
(
float
*
val
)
{
return
val
[
SparsePushValue
::
click_index
()];
}
static
float
&
embed_g
(
float
*
val
)
{
return
val
[
SparsePushValue
::
embed_g_index
()];
}
static
float
*
embedx_g
(
float
*
val
)
{
return
val
+
SparsePushValue
::
embedx_g_index
();
}
};
struct
SparsePullValue
{
/*
float embed_w;
std::vector<float> embedx_w;
*/
static
int
dim
(
int
embedx_dim
)
{
return
1
+
embedx_dim
;
}
static
int
dim_size
(
size_t
dim
)
{
return
sizeof
(
float
);
}
static
int
size
(
int
embedx_dim
)
{
return
dim
(
embedx_dim
)
*
sizeof
(
float
);
}
static
int
embed_w_index
()
{
return
0
;
}
static
int
embedx_w_index
()
{
return
1
;
}
static
float
&
embed_w
(
float
*
val
)
{
return
val
[
SparsePullValue
::
embed_w_index
()];
}
static
float
*
embedx_w
(
float
*
val
)
{
return
val
+
SparsePullValue
::
embedx_w_index
();
}
};
SparseAccessor
()
{}
virtual
int
initialize
();
virtual
void
GetTableInfo
(
AccessorInfo
&
info
);
virtual
~
SparseAccessor
()
{}
// value维度
virtual
size_t
dim
();
// value各个维度的size
virtual
size_t
dim_size
(
size_t
dim
);
// value各维度相加总size
virtual
size_t
size
();
// value中mf动态长度部分总size大小, sparse下生效
virtual
size_t
mf_size
();
// pull value维度
virtual
size_t
select_dim
();
// pull value各个维度的size
virtual
size_t
select_dim_size
(
size_t
dim
);
// pull value各维度相加总size
virtual
size_t
select_size
();
// push value维度
virtual
size_t
update_dim
();
// push value各个维度的size
virtual
size_t
update_dim_size
(
size_t
dim
);
// push value各维度相加总size
virtual
size_t
update_size
();
// 判断该value是否进行shrink
virtual
bool
shrink
(
float
*
value
);
// 判断该value是否保存到ssd
// virtual bool save_ssd(float* value);
virtual
bool
need_extend_mf
(
float
*
value
);
virtual
bool
has_mf
(
size_t
size
);
// 判断该value是否在save阶段dump,
// param作为参数用于标识save阶段,如downpour的xbox与batch_model
// param = 0, save all feature
// param = 1, save delta feature
// param = 2, save xbox base feature
bool
save
(
float
*
value
,
int
param
)
override
;
// update delta_score and unseen_days after save
void
update_stat_after_save
(
float
*
value
,
int
param
)
override
;
// keys不存在时,为values生成随机值
// 要求value的内存由外部调用者分配完毕
virtual
int32_t
create
(
float
**
value
,
size_t
num
);
// 从values中选取到select_values中
virtual
int32_t
select
(
float
**
select_values
,
const
float
**
values
,
size_t
num
);
// 将update_values聚合到一起
virtual
int32_t
merge
(
float
**
update_values
,
const
float
**
other_update_values
,
size_t
num
);
// 将update_values聚合到一起,通过it.next判定是否进入下一个key
// virtual int32_t merge(float** update_values, iterator it);
// 将update_values更新应用到values中
virtual
int32_t
update
(
float
**
values
,
const
float
**
update_values
,
size_t
num
);
std
::
string
parse_to_string
(
const
float
*
value
,
int
param
)
override
;
int32_t
parse_from_string
(
const
std
::
string
&
str
,
float
*
v
)
override
;
virtual
bool
create_value
(
int
type
,
const
float
*
value
);
// 这个接口目前只用来取show
float
get_field
(
float
*
value
,
const
std
::
string
&
name
)
override
{
// CHECK(name == "show");
if
(
name
==
"show"
)
{
return
sparse_feature_value
.
show
(
value
);
}
return
0.0
;
}
private:
// float show_click_score(float show, float click);
// SparseValueSGDRule* _embed_sgd_rule;
// SparseValueSGDRule* _embedx_sgd_rule;
// SparseFeatureValue sparse_feature_value;
float
_show_click_decay_rate
;
int32_t
_ssd_unseenday_threshold
;
public:
// TODO(zhaocaibei123): it should be private, but we make it public
// for unit test
SparseFeatureValue
sparse_feature_value
;
float
show_click_score
(
float
show
,
float
click
);
SparseValueSGDRule
*
_embed_sgd_rule
;
SparseValueSGDRule
*
_embedx_sgd_rule
;
};
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/ps/table/table.cc
浏览文件 @
5dcfb699
...
...
@@ -27,6 +27,7 @@
#endif
#include "paddle/fluid/distributed/ps/table/ctr_accessor.h"
#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h"
#include "paddle/fluid/distributed/ps/table/sparse_accessor.h"
#include "paddle/fluid/distributed/ps/table/tensor_accessor.h"
#include "paddle/fluid/distributed/ps/table/tensor_table.h"
...
...
@@ -49,6 +50,7 @@ REGISTER_PSCORE_CLASS(Table, MemorySparseTable);
REGISTER_PSCORE_CLASS
(
Table
,
MemorySparseGeoTable
);
REGISTER_PSCORE_CLASS
(
ValueAccessor
,
CommMergeAccessor
);
REGISTER_PSCORE_CLASS
(
ValueAccessor
,
CtrCommonAccessor
);
REGISTER_PSCORE_CLASS
(
ValueAccessor
,
SparseAccessor
);
REGISTER_PSCORE_CLASS
(
SparseValueSGDRule
,
StdAdaGradSGDRule
);
REGISTER_PSCORE_CLASS
(
SparseValueSGDRule
,
SparseAdamSGDRule
);
REGISTER_PSCORE_CLASS
(
SparseValueSGDRule
,
SparseNaiveSGDRule
);
...
...
paddle/fluid/distributed/ps/wrapper/fleet.cc
浏览文件 @
5dcfb699
此差异已折叠。
点击以展开。
paddle/fluid/distributed/ps/wrapper/fleet.h
浏览文件 @
5dcfb699
...
...
@@ -71,11 +71,22 @@ class FleetWrapper : public PSWrapper {
}
virtual
int32_t
Initialize
(
InitContext
&
context
)
{
return
0
;
}
// TODO(zhaocaibei123: later)
int32_t
CopyTable
(
const
uint64_t
src_table_id
,
const
uint64_t
dest_table_id
);
int32_t
CopyTableByFeasign
(
const
uint64_t
src_table_id
,
const
uint64_t
dest_table_id
,
const
std
::
vector
<
uint64_t
>&
feasign_list
);
typedef
std
::
function
<
void
(
int
,
int
)
>
HeterCallBackFunc
;
int
RegisterHeterCallback
(
HeterCallBackFunc
handler
);
virtual
void
Stop
()
override
;
virtual
void
Load
(
WrapperContext
&
context
)
override
;
virtual
void
Save
(
WrapperContext
&
context
)
override
;
// set client to client communication config
void
SetClient2ClientConfig
(
int
request_timeout_ms
,
int
connect_timeout_ms
,
int
max_retry
);
...
...
@@ -168,7 +179,8 @@ class FleetWrapper : public PSWrapper {
std
::
vector
<
const
LoDTensor
*>*
inputs
,
const
LoDTensor
*
shows
,
const
LoDTensor
*
clicks
,
std
::
vector
<
LoDTensor
*>*
outputs
);
std
::
vector
<
LoDTensor
*>*
outputs
,
bool
use_cvm_op
=
false
);
// Push sparse variables to server in Async mode
// Param<In>: scope, table_id, fea_keys, sparse_grad_names
// Param<Out>: push_values, push_sparse_status
...
...
@@ -185,12 +197,7 @@ class FleetWrapper : public PSWrapper {
const
std
::
vector
<
framework
::
ProgramDesc
>&
server_sub_program
=
{});
// init trainer
void
InitWorker
(
const
std
::
string
&
dist_desc
,
const
std
::
vector
<
std
::
string
>&
host_sign_list
,
Scope
*
scope
,
const
RpcCtxMap
&
send_ctx
,
const
std
::
unordered_map
<
uint64_t
,
std
::
vector
<
std
::
string
>>&
dense_varnames
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
envs
,
int
node_num
,
int
index
);
const
std
::
vector
<
std
::
string
>&
host_sign_list
,
int
index
);
// stop server
void
StopServer
();
...
...
@@ -200,6 +207,8 @@ class FleetWrapper : public PSWrapper {
uint64_t
RunServer
(
const
std
::
string
&
ip
,
uint32_t
port
);
// get client info
std
::
vector
<
uint64_t
>
GetClientsInfo
();
// set client info
int
SetClients
(
std
::
vector
<
uint64_t
>&
host_sign_list
);
// NOLINT
// create client to client connection
void
CreateClient2ClientConnection
();
// flush all push requests
...
...
@@ -255,10 +264,15 @@ class FleetWrapper : public PSWrapper {
// this performs better than rand_r, especially large data
std
::
default_random_engine
&
LocalRandomEngine
();
// for init worker
void
InitGFlag
(
const
std
::
string
&
gflags
);
static
std
::
shared_ptr
<
paddle
::
distributed
::
PSCore
>
pserver_ptr_
;
static
std
::
shared_ptr
<
paddle
::
distributed
::
PSClient
>
worker_ptr_
;
private:
static
std
::
shared_ptr
<
FleetWrapper
>
s_instance_
;
paddle
::
distributed
::
PaddlePSEnvironment
ps_env_
;
size_t
GetAbsoluteSum
(
size_t
start
,
size_t
end
,
size_t
level
,
const
framework
::
LoD
&
lod
);
...
...
paddle/fluid/distributed/test/memory_sparse_table_test.cc
浏览文件 @
5dcfb699
...
...
@@ -74,7 +74,7 @@ TEST(MemorySparseTable, SGD) {
std
::
vector
<
uint32_t
>
init_fres
=
{
1
,
1
,
1
,
1
,
1
};
std
::
vector
<
float
>
init_values
;
init_values
.
resize
(
init_keys
.
size
()
*
(
emb_dim
+
1
));
init_values
.
resize
(
init_keys
.
size
()
*
(
emb_dim
+
3
));
auto
value
=
PullSparseValue
(
init_keys
,
init_fres
,
emb_dim
);
table
->
pull_sparse
(
init_values
.
data
(),
value
);
...
...
@@ -119,11 +119,11 @@ TEST(MemorySparseTable, SGD) {
}
std
::
vector
<
float
>
pull_values
;
pull_values
.
resize
(
init_keys
.
size
()
*
(
emb_dim
+
1
));
pull_values
.
resize
(
init_keys
.
size
()
*
(
emb_dim
+
3
));
table
->
pull_sparse
(
pull_values
.
data
(),
value
);
for
(
size_t
i
=
0
;
i
<
init_keys
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
emb_dim
+
1
;
++
j
)
{
for
(
size_t
j
=
2
;
j
<
emb_dim
+
3
;
++
j
)
{
auto
update_val
=
init_values
[
i
*
(
emb_dim
+
1
)
+
j
]
-
0.1
*
total_gradients
[
3
+
i
*
(
emb_dim
+
4
)
+
j
];
VLOG
(
3
)
<<
total_gradients
[
i
*
(
emb_dim
+
4
)
+
j
+
3
]
<<
":"
...
...
paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
浏览文件 @
5dcfb699
...
...
@@ -24,6 +24,9 @@
#include "paddle/fluid/eager/utils.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
namespace
egr
{
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5dcfb699
...
...
@@ -235,7 +235,7 @@ if(WITH_PYTHON)
py_proto_compile
(
trainer_py_proto SRCS trainer_desc.proto data_feed.proto
)
py_proto_compile
(
distributed_strategy_py_proto SRCS distributed_strategy.proto
)
py_proto_compile
(
pass_desc_py_proto SRCS pass_desc.proto
)
py_proto_compile
(
ps_py_proto SRCS ps.proto
)
py_proto_compile
(
ps_py_proto SRCS
the_one_
ps.proto
)
#Generate an empty \
#__init__.py to make framework_py_proto as a valid python module.
add_custom_target
(
fleet_proto_init ALL
...
...
@@ -249,7 +249,7 @@ if(WITH_PYTHON)
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/
COMMAND cp distributed_strategy_*.py
${
PADDLE_BINARY_DIR
}
/python/paddle/distributed/fleet/proto
COMMAND cp ps_pb2.py
${
PADDLE_BINARY_DIR
}
/python/paddle/distributed/fleet/proto
COMMAND cp
the_one_
ps_pb2.py
${
PADDLE_BINARY_DIR
}
/python/paddle/distributed/fleet/proto
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_target
(
fleet_executor_proto_init ALL DEPENDS fleet_proto_init fleet_executor_desc_py_proto
...
...
@@ -261,7 +261,7 @@ if(WITH_PYTHON)
add_custom_command
(
TARGET framework_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto
COMMAND copy /Y *.py
${
proto_dstpath
}
COMMAND copy /Y ps_pb2.py
${
fleet_proto_dstpath
}
COMMAND copy /Y
the_one_
ps_pb2.py
${
fleet_proto_dstpath
}
COMMAND copy /Y distributed_strategy_*.py
${
fleet_proto_dstpath
}
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
COMMENT
"Copy generated python proto into directory paddle/distributed/fleet/proto."
...
...
@@ -314,7 +314,7 @@ if(WITH_DISTRIBUTE)
dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc
heterxpu_trainer.cc heter_pipeline_trainer.cc
data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc
downpour_worker.cc downpour_worker_opt.cc
downpour_worker.cc downpour_
lite_worker.cc downpour_
worker_opt.cc
pull_dense_worker.cc section_worker.cc heter_section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry
device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog
index_sampler index_wrapper sampler index_dataset_proto
...
...
@@ -329,6 +329,7 @@ if(WITH_DISTRIBUTE)
set_source_files_properties
(
device_worker.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
multi_trainer.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
hogwild_worker.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
downpour_lite_worker.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
heter_section_worker.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
heter_pipeline_trainer.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
...
...
paddle/fluid/framework/device_worker.h
浏览文件 @
5dcfb699
...
...
@@ -27,6 +27,10 @@ limitations under the License. */
#include <utility> // NOLINT
#include <vector>
#if defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/ps/wrapper/fleet.h"
#endif
#include "paddle/fluid/framework/data_feed.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/heter_util.h"
...
...
@@ -107,7 +111,12 @@ class PullDenseWorker {
bool
CheckUpdateParam
(
uint64_t
table_id
);
private:
#if defined(PADDLE_WITH_PSCORE)
std
::
shared_ptr
<
paddle
::
distributed
::
FleetWrapper
>
fleet_ptr_
;
#else
std
::
shared_ptr
<
paddle
::
framework
::
FleetWrapper
>
fleet_ptr_
;
#endif
PullDenseWorkerParameter
param_
;
DownpourWorkerParameter
dwp_param_
;
Scope
*
root_scope_
;
...
...
@@ -341,6 +350,79 @@ class DownpourWorker : public HogwildWorker {
// std::vector<std::pair<uint64_t, uint64_t>> copy_dense_tables_;
};
// Based on DownpourWorker,remove push pull code into operator
#if defined(PADDLE_WITH_PSCORE)
class
DownpourLiteWorker
:
public
HogwildWorker
{
public:
DownpourLiteWorker
()
{}
virtual
~
DownpourLiteWorker
()
{}
virtual
void
Initialize
(
const
TrainerDesc
&
desc
);
virtual
void
TrainFiles
();
virtual
void
TrainFilesWithProfiler
();
protected:
std
::
shared_ptr
<
paddle
::
distributed
::
FleetWrapper
>
fleet_ptr_
;
std
::
shared_ptr
<
paddle
::
framework
::
PullDenseWorker
>
pull_dense_worker_
;
void
PushGradients
();
void
CopySparseTable
();
void
CopyDenseTable
();
void
CopyDenseVars
();
DownpourWorkerParameter
param_
;
// copy table
CopyTableConfig
copy_table_config_
;
std
::
vector
<
std
::
pair
<
uint64_t
,
uint64_t
>>
copy_sparse_tables_
;
std
::
unordered_map
<
uint64_t
,
std
::
unordered_set
<
uint64_t
>>
feasign_set_
;
// actually pushed feasign of each table
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>
sparse_push_keys_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
sparse_key_names_
;
// feasign
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>
features_
;
// feasign embedding
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
float
>>>
feature_values_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
sparse_value_names_
;
// adjust ins weight
AdjustInsWeightConfig
adjust_ins_weight_config_
;
// check nan and inf during training
std
::
vector
<
std
::
string
>
check_nan_var_names_
;
bool
need_to_push_sparse_
;
// feasign stats
std
::
map
<
uint64_t
,
std
::
vector
<
float
>>
feature_labels_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
sparse_grad_names_
;
// feasign embedding gradient
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
vector
<
float
>>>
feature_grads_
;
std
::
vector
<::
std
::
future
<
int32_t
>>
push_sparse_status_
;
bool
dump_slot_
;
bool
need_to_push_dense_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
dense_grad_names_
;
float
scale_datanorm_
;
std
::
vector
<::
std
::
future
<
int32_t
>>
push_dense_status_
;
// skipped ops
std
::
vector
<
std
::
string
>
skip_ops_
;
// just save the value in param_ for easy access
std
::
map
<
uint64_t
,
std
::
string
>
label_var_name_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
dense_value_names_
;
std
::
map
<
uint64_t
,
uint64_t
>
table_dependency_
;
std
::
vector
<
std
::
pair
<
uint64_t
,
uint64_t
>>
copy_dense_tables_
;
// multitask
std
::
map
<
int32_t
,
uint64_t
>
cond2table_map_
;
std
::
set
<
uint64_t
>
condvalue_set_
;
bool
flag_partial_push_
;
private:
// std::vector<std::string> dump_param_;
// just save the value in param_ for easy access
// std::map<uint64_t, std::string> label_var_name_;
// std::map<uint64_t, std::vector<std::string>> dense_value_names_;
std
::
shared_ptr
<
PullDenseWorker
>
_pull_dense_worker
;
std
::
vector
<
float
>
nid_show_
;
// std::map<uint64_t, uint64_t> table_dependency_;
// std::vector<std::pair<uint64_t, uint64_t>> copy_dense_tables_;
};
#endif
class
DownpourWorkerOpt
:
public
DownpourWorker
{
public:
DownpourWorkerOpt
()
{}
...
...
paddle/fluid/framework/device_worker_factory.cc
浏览文件 @
5dcfb699
...
...
@@ -67,6 +67,7 @@ REGISTER_DEVICE_WORKER_CLASS(DownpourWorker);
REGISTER_DEVICE_WORKER_CLASS
(
DownpourWorkerOpt
);
#if defined(PADDLE_WITH_PSCORE)
REGISTER_DEVICE_WORKER_CLASS
(
DownpourLiteWorker
);
REGISTER_DEVICE_WORKER_CLASS
(
HeterSectionWorker
);
#endif
...
...
paddle/fluid/framework/dist_multi_trainer.cc
浏览文件 @
5dcfb699
...
...
@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if defined(PADDLE_WITH_PSCORE)
#include "paddle/fluid/distributed/ps/wrapper/fleet.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h"
...
...
@@ -62,7 +66,11 @@ void DistMultiTrainer::Initialize(const TrainerDesc &trainer_desc,
}
void
DistMultiTrainer
::
RegisterHeterCallback
()
{
#ifdef PADDLE_WITH_PSCORE
auto
fleet_ptr
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
#else
auto
fleet_ptr
=
FleetWrapper
::
GetInstance
();
#endif
fleet_ptr
->
RegisterHeterCallback
(
[
this
](
int
worker
,
int
taskid
)
{
workers_
[
worker
]
->
Schedule
(
taskid
);
});
}
...
...
@@ -93,7 +101,7 @@ void DistMultiTrainer::InitTrainerEnv(const ProgramDesc &main_program,
workers_
[
i
]
->
SetRootScope
(
root_scope_
);
workers_
[
i
]
->
CreateDeviceResource
(
main_program
);
// Program
workers_
[
i
]
->
BindingDataFeedMemory
();
#if
def PADDLE_WITH_PSLIB
#if
defined(PADDLE_WITH_PSLIB) || defined(PADDLE_WITH_PSCORE)
workers_
[
i
]
->
CacheProgram
(
main_program
);
#endif
}
...
...
@@ -110,7 +118,7 @@ void DistMultiTrainer::InitOtherEnv(const ProgramDesc &main_program) {
}
pull_dense_worker_
->
SetRootScope
(
root_scope_
);
pull_dense_worker_
->
Start
();
#if
def PADDLE_WITH_PSLIB
#if
defined(PADDLE_WITH_PSLIB) || defined(PADDLE_WITH_PSCORE)
for
(
int
i
=
0
;
i
<
thread_num_
;
++
i
)
{
workers_
[
i
]
->
GetXpuOpIndex
();
}
...
...
@@ -176,8 +184,12 @@ void DistMultiTrainer::Finalize() {
pull_dense_worker_
->
Stop
();
root_scope_
->
DropKids
();
// flush local client push queue
// flush local client push queue
#ifdef PADDLE_WITH_PSCORE
auto
fleet_ptr_
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
#else
auto
fleet_ptr_
=
FleetWrapper
::
GetInstance
();
#endif
fleet_ptr_
->
ClientFlush
();
}
...
...
paddle/fluid/framework/downpour_lite_worker.cc
0 → 100644
浏览文件 @
5dcfb699
此差异已折叠。
点击以展开。
paddle/fluid/framework/fleet/metrics.cc
浏览文件 @
5dcfb699
...
...
@@ -19,7 +19,7 @@
#include <numeric>
#include "paddle/fluid/framework/lod_tensor.h"
#if defined(PADDLE_WITH_PSLIB)
#if defined(PADDLE_WITH_PSLIB)
|| defined(PADDLE_WITH_PSCORE)
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/fleet/metrics.h
浏览文件 @
5dcfb699
...
...
@@ -38,7 +38,7 @@ limitations under the License. */
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
#if defined(PADDLE_WITH_PSLIB)
#if defined(PADDLE_WITH_PSLIB)
|| defined(PADDLE_WITH_PSCORE)
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/pull_dense_worker.cc
浏览文件 @
5dcfb699
...
...
@@ -61,7 +61,13 @@ void PullDenseWorker::Initialize(const TrainerDesc& param) {
last_versions_
[
tid
]
=
0
;
current_version_
[
tid
]
=
0
;
}
#if defined(PADDLE_WITH_PSCORE)
fleet_ptr_
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
#else
fleet_ptr_
=
FleetWrapper
::
GetInstance
();
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
copy_streams_
.
clear
();
#endif
...
...
@@ -170,6 +176,9 @@ void PullDenseWorker::PullDense(bool force_update) {
VLOG
(
3
)
<<
"pull dense "
<<
force_update
<<
" "
<<
tid
;
fleet_ptr_
->
PullDenseVarsAsync
(
*
root_scope_
,
tid
,
dense_value_names_
[
tid
],
&
pull_dense_status_
,
false
);
#elif defined(PADDLE_WITH_PSCORE)
fleet_ptr_
->
PullDenseVarsAsync
(
*
root_scope_
,
tid
,
dense_value_names_
[
tid
],
&
pull_dense_status_
,
true
);
#else
fleet_ptr_
->
PullDenseVarsAsync
(
*
root_scope_
,
tid
,
dense_value_names_
[
tid
],
&
pull_dense_status_
,
true
);
...
...
paddle/fluid/framework/ps.proto
→
paddle/fluid/framework/
the_one_
ps.proto
浏览文件 @
5dcfb699
文件已移动
paddle/fluid/operators/abs_op.cc
浏览文件 @
5dcfb699
...
...
@@ -30,6 +30,21 @@ namespace operators {
class
AbsOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
framework
::
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
);
#ifdef PADDLE_WITH_MKLDNN
if
(
this
->
CanMKLDNNBeUsed
(
ctx
,
input_data_type
))
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
}
#endif
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
class
AbsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -72,8 +87,17 @@ class AbsGradOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
dtype
=
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
);
return
framework
::
OpKernelType
(
dtype
,
ctx
.
GetPlace
());
auto
input_data_type
=
framework
::
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
);
#ifdef PADDLE_WITH_MKLDNN
if
(
this
->
CanMKLDNNBeUsed
(
ctx
,
input_data_type
))
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
}
#endif
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
...
...
paddle/fluid/operators/conv_op_npu.cc
浏览文件 @
5dcfb699
...
...
@@ -390,6 +390,204 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
}
}
};
template
<
typename
T
>
class
NPUConv3dKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
const
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
PADDLE_ENFORCE_EQ
(
data_format
,
"NCDHW"
,
platform
::
errors
::
Unimplemented
(
"the data_format must be NCDHW in "
"the npu kernel of conv3d, but got data_format "
"= [%s]"
,
data_format
));
PADDLE_ENFORCE_EQ
(
groups
,
1
,
platform
::
errors
::
Unimplemented
(
"the groups must be 1 in "
"the npu kernel of conv3d, but got groups "
"= [%d]"
,
groups
));
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
NPUDeviceContext
>();
auto
input_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
input
->
dims
(),
dev_ctx
);
auto
filter_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
filter
->
dims
(),
dev_ctx
);
auto
output_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
output
->
dims
(),
dev_ctx
);
input_tensor
.
ShareDataWith
(
*
input
);
filter_tensor
.
ShareDataWith
(
*
filter
);
output_tensor
.
ShareDataWith
(
*
output
);
input_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
filter_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
output_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
// update padding and dilation
auto
in_dims
=
input
->
dims
();
auto
filter_dims
=
filter
->
dims
();
framework
::
DDim
in_data_dims
;
framework
::
DDim
filter_data_dims
;
in_data_dims
=
phi
::
slice_ddim
(
in_dims
,
2
,
in_dims
.
size
());
filter_data_dims
=
phi
::
slice_ddim
(
filter_dims
,
2
,
in_dims
.
size
());
std
::
vector
<
int
>
ksize
=
phi
::
vectorize
<
int
>
(
filter_data_dims
);
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
padding_algorithm
,
in_data_dims
,
strides
,
ksize
);
std
::
vector
<
int
>
strides_vec
(
5
,
1
);
std
::
vector
<
int
>
dilations_vec
(
5
,
1
);
strides_vec
[
2
]
=
strides
[
0
];
strides_vec
[
3
]
=
strides
[
1
];
strides_vec
[
4
]
=
strides
[
2
];
dilations_vec
[
2
]
=
dilations
[
0
];
dilations_vec
[
3
]
=
dilations
[
1
];
dilations_vec
[
4
]
=
dilations
[
2
];
auto
stream
=
ctx
.
template
device_context
<
NPUDeviceContext
>().
stream
();
const
auto
&
runner
=
NpuOpRunner
(
"Conv3D"
,
{
input_tensor
,
filter_tensor
},
{
output_tensor
},
{{
"strides"
,
strides_vec
},
{
"pads"
,
paddings
},
{
"dilations"
,
dilations_vec
},
{
"groups"
,
groups
},
{
"data_format"
,
data_format
}});
runner
.
Run
(
stream
);
}
};
template
<
typename
T
>
class
NPUConv3dGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
const
Tensor
*
filter
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
const
Tensor
*
output_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
Tensor
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
Tensor
*
filter_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Filter"
));
const
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
const
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
PADDLE_ENFORCE_EQ
(
data_format
,
"NCDHW"
,
platform
::
errors
::
Unimplemented
(
"the data_format must be NCDHW in "
"the npu kernel of conv3d, but got data_format "
"= [%s]"
,
data_format
));
PADDLE_ENFORCE_EQ
(
groups
,
1
,
platform
::
errors
::
Unimplemented
(
"the groups must be 1 in "
"the npu kernel of conv3d, but got groups "
"= [%d]"
,
groups
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
NPUDeviceContext
>();
auto
input_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
input
->
dims
(),
dev_ctx
);
auto
filter_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
filter
->
dims
(),
dev_ctx
);
auto
output_grad_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
output_grad
->
dims
(),
dev_ctx
);
input_tensor
.
ShareDataWith
(
*
input
);
filter_tensor
.
ShareDataWith
(
*
filter
);
output_grad_tensor
.
ShareDataWith
(
*
output_grad
);
input_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
filter_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
output_grad_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
// update padding and dilation
auto
in_dims
=
input
->
dims
();
auto
filter_dims
=
filter
->
dims
();
framework
::
DDim
in_data_dims
;
framework
::
DDim
filter_data_dims
;
in_data_dims
=
phi
::
slice_ddim
(
in_dims
,
1
,
in_dims
.
size
()
-
1
);
filter_data_dims
=
phi
::
slice_ddim
(
filter_dims
,
2
,
in_dims
.
size
());
std
::
vector
<
int
>
ksize
=
phi
::
vectorize
<
int
>
(
filter_data_dims
);
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
padding_algorithm
,
in_data_dims
,
strides
,
ksize
);
std
::
vector
<
int
>
strides_vec
(
5
,
1
);
std
::
vector
<
int
>
dilations_vec
(
5
,
1
);
strides_vec
[
2
]
=
strides
[
0
];
strides_vec
[
3
]
=
strides
[
1
];
strides_vec
[
4
]
=
strides
[
2
];
dilations_vec
[
2
]
=
dilations
[
0
];
dilations_vec
[
3
]
=
dilations
[
1
];
dilations_vec
[
4
]
=
dilations
[
2
];
auto
stream
=
ctx
.
template
device_context
<
NPUDeviceContext
>().
stream
();
if
(
filter_grad
)
{
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
filter_shape_vec
=
phi
::
vectorize
<
int
>
(
filter
->
dims
());
Tensor
filter_grad_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
filter_grad
->
dims
(),
dev_ctx
);
filter_grad_tensor
.
ShareDataWith
(
*
filter_grad
);
filter_grad_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
const
auto
&
runner
=
NpuOpRunner
(
"Conv3DBackpropFilterD"
,
{
input_tensor
,
output_grad_tensor
},
{
filter_grad_tensor
},
{{
"filter_size"
,
filter_shape_vec
},
{
"strides"
,
strides_vec
},
{
"pads"
,
paddings
},
{
"dilations"
,
dilations_vec
},
{
"groups"
,
groups
},
{
"data_format"
,
data_format
}});
runner
.
Run
(
stream
);
}
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
input_shape_vec
=
phi
::
vectorize
<
int
>
(
input
->
dims
());
Tensor
input_grad_tensor
=
ctx
.
AllocateTmpTensor
<
T
,
NPUDeviceContext
>
(
input_grad
->
dims
(),
dev_ctx
);
input_grad_tensor
.
ShareDataWith
(
*
input_grad
);
input_grad_tensor
.
set_layout
(
DataLayout
::
kNCDHW
);
const
auto
&
runner
=
NpuOpRunner
(
"Conv3DBackpropInputD"
,
{
filter_tensor
,
output_grad_tensor
},
{
input_grad_tensor
},
{{
"input_size"
,
input_shape_vec
},
{
"strides"
,
strides_vec
},
{
"pads"
,
paddings
},
{
"dilations"
,
dilations_vec
},
{
"groups"
,
groups
},
{
"data_format"
,
data_format
}});
runner
.
Run
(
stream
);
}
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -408,3 +606,9 @@ REGISTER_OP_NPU_KERNEL(conv2d, ops::NPUConvOpKernel<float>,
REGISTER_OP_NPU_KERNEL
(
conv2d_grad
,
ops
::
NPUConvGradOpKernel
<
float
>
,
ops
::
NPUConvGradOpKernel
<
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
conv3d
,
ops
::
NPUConv3dKernel
<
float
>
,
ops
::
NPUConv3dKernel
<
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
conv3d_grad
,
ops
::
NPUConv3dGradKernel
<
float
>
,
ops
::
NPUConv3dGradKernel
<
plat
::
float16
>
);
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
浏览文件 @
5dcfb699
...
...
@@ -315,15 +315,7 @@ using ExpMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
namespace
ops
=
paddle
::
operators
;
#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_KERNEL(act_type, MKLDNN, ::paddle::platform::CPUPlace, \
ops::MKLDNNActivationKernel<ops::functor<float>>); \
REGISTER_OP_KERNEL( \
act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \
ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);
#define REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(act_type, functor, \
grad_functor) \
#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_KERNEL( \
act_type, MKLDNN, ::paddle::platform::CPUPlace, \
ops::MKLDNNActivationKernel<ops::functor<float>>, \
...
...
@@ -339,30 +331,27 @@ namespace ops = paddle::operators;
ops::MKLDNNActivationKernel<ops::functor<float>>);
#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \
__macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor); \
__macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
__macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \
__macro(hard_swish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \
__macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradUseOutFunctor); \
__macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); \
__macro(elu, EluMKLDNNFunctor, EluMKLDNNGradUseOutFunctor); \
__macro(exp, ExpMKLDNNFunctor, ExpMKLDNNGradUseOutFunctor);
__macro(exp, ExpMKLDNNFunctor, ExpMKLDNNGradUseOutFunctor); \
__macro(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor); \
__macro(hard_swish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \
__macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
__macro(mish, MishMKLDNNFunctor, MishMKLDNNGradFunctor); \
__macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
__macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor); \
__macro(sigmoid, SigmoidMKLDNNFunctor, SigmoidMKLDNNGradUseOutFunctor); \
__macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradUseOutFunctor); \
__macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \
__macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradUseOutFunctor);
FOR_EACH_MKLDNN_KERNEL_FUNCTOR
(
REGISTER_ACTIVATION_MKLDNN_KERNEL
);
REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY
(
round
,
RoundMKLDNNFunctor
);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL
(
relu
,
ReluMKLDNNFunctor
,
ReluMKLDNNGradFunctor
);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL
(
gelu
,
GeluMKLDNNFunctor
,
GeluMKLDNNGradFunctor
);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL
(
sigmoid
,
SigmoidMKLDNNFunctor
,
SigmoidMKLDNNGradUseOutFunctor
);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL
(
sqrt
,
SqrtMKLDNNFunctor
,
SqrtMKLDNNGradUseOutFunctor
);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL
(
mish
,
MishMKLDNNFunctor
,
MishMKLDNNGradFunctor
);
REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY
(
round
,
RoundMKLDNNFunctor
);
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
softplus
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
MKLDNNActivationKernel
<
ops
::
SoftplusMKLDNNFunctor
<
float
>>
);
ops
::
MKLDNNActivationKernel
<
ops
::
SoftplusMKLDNNFunctor
<
float
>>
,
ops
::
MKLDNNActivationKernel
<
ops
::
SoftplusMKLDNNFunctor
<
paddle
::
platform
::
bfloat16
>>
);
paddle/fluid/operators/pscore/distributed_lookup_table_op.h
浏览文件 @
5dcfb699
...
...
@@ -13,7 +13,6 @@
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/distributed/ps/service/communicator/communicator.h"
#include "paddle/fluid/distributed/ps/wrapper/fleet.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -52,15 +51,13 @@ class DistributedLookupTableKernel : public framework::OpKernel<T> {
auto
inputs
=
context
.
MultiInput
<
framework
::
LoDTensor
>
(
"Ids"
);
auto
outputs
=
context
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Outputs"
);
// auto fleet = distributed::FleetWrapper::GetInstance();
auto
*
communicator
=
(
distributed
::
AsyncCommunicator
*
)
distributed
::
Communicator
::
GetInstance
();
auto
fleet
=
distributed
::
FleetWrapper
::
GetInstance
();
if
(
platform
::
is_cpu_place
(
context
.
GetPlace
()))
{
communicator
->
PullSparseToTensorSync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
),
context
.
GetPlace
(),
!
is_test
,
&
inputs
,
&
outputs
);
fleet
->
PullSparseToTensorSync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
)
,
context
.
GetPlace
(),
!
is_test
,
&
inputs
,
&
outputs
);
}
else
{
auto
inputs_variable
=
context
.
MultiInputVar
(
"Ids"
);
auto
outputs_variable
=
context
.
MultiOutputVar
(
"Outputs"
);
...
...
@@ -96,10 +93,10 @@ class DistributedLookupTableKernel : public framework::OpKernel<T> {
}
// use fleet->PullSparse
communicator
->
PullSparseToTensorSync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
),
cpu_place
,
!
is_test
,
&
tmp_input_vec
,
&
tmp_output_vec
);
fleet
->
PullSparseToTensorSync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
)
,
cpu_place
,
!
is_test
,
&
tmp_input_vec
,
&
tmp_output_vec
);
// cp temp to origin
for
(
size_t
idx
=
0
;
idx
<
output_var_size
;
++
idx
)
{
...
...
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
浏览文件 @
5dcfb699
...
...
@@ -106,6 +106,9 @@ class DistributedPushSparseOpMaker : public framework::OpProtoAndCheckerMaker {
"for training."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"use_cvm_op"
,
"(boolean, default false) Use cvm op or not."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Lookup Tablel Prefetch Operator.
This operator is used to perform lookup on parameter W,
...
...
paddle/fluid/operators/pscore/distributed_push_sparse_op.h
浏览文件 @
5dcfb699
...
...
@@ -13,7 +13,6 @@
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/distributed/ps/service/communicator/communicator.h"
#include "paddle/fluid/distributed/ps/wrapper/fleet.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -32,22 +31,20 @@ class DistributedPushSparseKernel : public framework::OpKernel<T> {
auto
padding_idx
=
context
.
Attr
<
int64_t
>
(
"padding_idx"
);
auto
table_id
=
context
.
Attr
<
int
>
(
"table_id"
);
auto
emb_dim
=
context
.
Attr
<
int
>
(
"size"
);
VLOG
(
1
)
<<
"push_sparse.h::emb_dim: "
<<
emb_dim
;
auto
use_cvm_op
=
context
.
Attr
<
bool
>
(
"use_cvm_op"
)
;
auto
inputs
=
context
.
MultiInput
<
framework
::
LoDTensor
>
(
"Ids"
);
auto
shows
=
context
.
Input
<
framework
::
LoDTensor
>
(
"Shows"
);
auto
clks
=
context
.
Input
<
framework
::
LoDTensor
>
(
"Clicks"
);
auto
outputs
=
context
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Outputs"
);
// auto fleet = distributed::FleetWrapper::GetInstance();
auto
*
communicator
=
(
distributed
::
AsyncCommunicator
*
)
distributed
::
Communicator
::
GetInstance
();
auto
fleet
=
distributed
::
FleetWrapper
::
GetInstance
();
if
(
platform
::
is_cpu_place
(
context
.
GetPlace
()))
{
communicator
->
PushSparseFromTensorAsync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
),
context
.
GetPlace
(),
&
input
s
,
shows
,
clks
,
&
outputs
);
fleet
->
PushSparseFromTensorAsync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
)
,
context
.
GetPlace
(),
&
inputs
,
shows
,
clk
s
,
&
outputs
,
use_cvm_op
);
}
else
{
auto
inputs_variable
=
context
.
MultiInputVar
(
"Ids"
);
auto
outputs_variable
=
context
.
MultiOutputVar
(
"Outputs"
);
...
...
@@ -94,7 +91,7 @@ class DistributedPushSparseKernel : public framework::OpKernel<T> {
}
// use fleet->PullSparse
communicator
->
PushSparseFromTensorAsync
(
fleet
->
PushSparseFromTensorAsync
(
static_cast
<
uint64_t
>
(
table_id
),
emb_dim
,
static_cast
<
uint64_t
>
(
padding_idx
),
context
.
GetPlace
(),
&
tmp_input_vec
,
tmp_shows_tensor
,
tmp_clicks_tensor
,
&
tmp_output_vec
);
...
...
paddle/fluid/operators/pscore/send_op.cc
浏览文件 @
5dcfb699
...
...
@@ -53,7 +53,7 @@ class SendOp : public framework::OperatorBase {
send_varnames
[
0
]
!=
"@PS_STEP_COUNTER@"
)
{
auto
fleet
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
std
::
vector
<::
std
::
future
<
int32_t
>>
status
;
fleet
->
PushDenseVarsAsync
(
scope
,
table_id
,
ins
,
&
status
,
0
,
-
1
);
fleet
->
PushDenseVarsAsync
(
scope
,
table_id
,
ins
,
&
status
,
-
1
,
-
1
);
}
else
{
auto
*
communicator
=
paddle
::
distributed
::
Communicator
::
GetInstance
();
if
(
communicator
->
Check
(
send_varnames
))
{
...
...
paddle/fluid/platform/device/npu/npu_op_runner.cc
浏览文件 @
5dcfb699
...
...
@@ -47,6 +47,8 @@ static std::map<framework::proto::VarType::Type, aclDataType>
static
std
::
map
<
DataLayout
,
aclFormat
>
DATA_LAYOUT_2_ACL_FORMAT
=
{
{
DataLayout
::
kNCHW
,
ACL_FORMAT_NCHW
},
{
DataLayout
::
kNHWC
,
ACL_FORMAT_NHWC
},
{
DataLayout
::
kNCDHW
,
ACL_FORMAT_NCDHW
},
{
DataLayout
::
kNDHWC
,
ACL_FORMAT_NDHWC
},
{
DataLayout
::
kAnyLayout
,
ACL_FORMAT_ND
},
};
...
...
paddle/fluid/pybind/fleet_py.cc
浏览文件 @
5dcfb699
...
...
@@ -77,6 +77,8 @@ void BindDistFleetWrapper(py::module* m) {
.
def
(
"stop_worker"
,
&
FleetWrapper
::
FinalizeWorker
)
.
def
(
"barrier"
,
&
FleetWrapper
::
BarrierWithTable
)
.
def
(
"shrink_sparse_table"
,
&
FleetWrapper
::
ShrinkSparseTable
)
.
def
(
"set_clients"
,
&
FleetWrapper
::
SetClients
)
.
def
(
"get_client_info"
,
&
FleetWrapper
::
GetClientsInfo
)
.
def
(
"create_client2client_connection"
,
&
FleetWrapper
::
CreateClient2ClientConnection
);
}
...
...
paddle/phi/common/layout.h
浏览文件 @
5dcfb699
...
...
@@ -30,6 +30,8 @@ enum class DataLayout {
SPARSE_COO
,
SPARSE_CSR
,
NUM_DATA_LAYOUTS
,
NDHWC
,
NCDHW
,
// See Note [ Why we need ALL in basic kernel key member? ]
ALL_LAYOUT
=
UNDEFINED
,
// Note: Unify phi DataLayout and fluid::framework::DataLayout,
...
...
@@ -43,6 +45,8 @@ enum class DataLayout {
kNHWC
=
NHWC
,
kNCHW
=
NCHW
,
kMKLDNN
=
MKLDNN
,
// all layouts supported by MKLDNN internally
kNDHWC
=
NDHWC
,
kNCDHW
=
NCDHW
,
};
}
// namespace experimental
...
...
@@ -70,6 +74,10 @@ inline DataLayout StringToDataLayout(const std::string& str) {
return
DataLayout
::
SPARSE_COO
;
}
else
if
(
s
==
"SPARSE_CSR"
)
{
return
DataLayout
::
SPARSE_CSR
;
}
else
if
(
s
==
"NDHWC"
)
{
return
DataLayout
::
kNDHWC
;
}
else
if
(
s
==
"NCDHW"
)
{
return
DataLayout
::
kNCDHW
;
}
else
{
PD_THROW
(
"Unknown data layout type string: "
,
s
,
"."
);
}
...
...
@@ -89,6 +97,10 @@ inline std::string DataLayoutToString(const DataLayout& layout) {
return
"SPARSE_COO"
;
case
DataLayout
::
SPARSE_CSR
:
return
"SPARSE_CSR"
;
case
DataLayout
::
kNDHWC
:
return
"NDHWC"
;
case
DataLayout
::
kNCDHW
:
return
"NCDHW"
;
default:
PD_THROW
(
"Unknown Data Layout type "
,
static_cast
<
int
>
(
layout
),
"."
);
}
...
...
paddle/phi/kernels/cpu/elementwise_grad_kernel.cc
浏览文件 @
5dcfb699
...
...
@@ -259,7 +259,7 @@ PD_REGISTER_KERNEL(multiply_triple_grad,
phi
::
dtype
::
bfloat16
,
phi
::
dtype
::
complex
<
float
>
,
phi
::
dtype
::
complex
<
double
>
)
{}
PD_REGISTER_KERNEL
(
elementwise_
fmax_grad
,
PD_REGISTER_KERNEL
(
fmax_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMaxGradKernel
,
...
...
@@ -268,7 +268,7 @@ PD_REGISTER_KERNEL(elementwise_fmax_grad,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_
fmin_grad
,
PD_REGISTER_KERNEL
(
fmin_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMinGradKernel
,
...
...
paddle/phi/kernels/cpu/elementwise_kernel.cc
浏览文件 @
5dcfb699
...
...
@@ -87,23 +87,11 @@ using complex128 = ::phi::dtype::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::phi::dtype::bfloat16;
PD_REGISTER_KERNEL
(
elementwise_fmax
,
CPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
fmax
,
CPU
,
ALL_LAYOUT
,
phi
::
FMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_fmin
,
CPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
fmin
,
CPU
,
ALL_LAYOUT
,
phi
::
FMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
add_raw
,
CPU
,
...
...
paddle/phi/kernels/elementwise_kernel.h
浏览文件 @
5dcfb699
...
...
@@ -20,18 +20,18 @@
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
Elementwise
FMaxKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
void
FMaxKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
Elementwise
FMinKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
void
FMinKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
AddRawKernel
(
const
Context
&
dev_ctx
,
...
...
paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
浏览文件 @
5dcfb699
...
...
@@ -282,7 +282,7 @@ PD_REGISTER_KERNEL(multiply_triple_grad,
phi
::
dtype
::
bfloat16
,
phi
::
dtype
::
complex
<
float
>
,
phi
::
dtype
::
complex
<
double
>
)
{}
PD_REGISTER_KERNEL
(
elementwise_
fmax_grad
,
PD_REGISTER_KERNEL
(
fmax_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMaxGradKernel
,
...
...
@@ -291,7 +291,7 @@ PD_REGISTER_KERNEL(elementwise_fmax_grad,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_
fmin_grad
,
PD_REGISTER_KERNEL
(
fmin_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMinGradKernel
,
...
...
paddle/phi/kernels/gpu/elementwise_kernel.cu
浏览文件 @
5dcfb699
...
...
@@ -57,23 +57,11 @@ using bfloat16 = phi::dtype::bfloat16;
using
complex64
=
::
phi
::
dtype
::
complex
<
float
>
;
using
complex128
=
::
phi
::
dtype
::
complex
<
double
>
;
PD_REGISTER_KERNEL
(
elementwise_fmax
,
GPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
fmax
,
GPU
,
ALL_LAYOUT
,
phi
::
FMaxKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
elementwise_fmin
,
GPU
,
ALL_LAYOUT
,
phi
::
ElementwiseFMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
fmin
,
GPU
,
ALL_LAYOUT
,
phi
::
FMinKernel
,
float
,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
add_raw
,
GPU
,
...
...
paddle/phi/kernels/impl/elementwise_kernel_impl.h
浏览文件 @
5dcfb699
...
...
@@ -23,22 +23,22 @@
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
Elementwise
FMaxKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
void
FMaxKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
funcs
::
ElementwiseCompute
<
funcs
::
FMaxFunctor
<
T
>
,
T
,
T
>
(
dev_ctx
,
x
,
y
,
axis
,
funcs
::
FMaxFunctor
<
T
>
(),
out
);
}
template
<
typename
T
,
typename
Context
>
void
Elementwise
FMinKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
void
FMinKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
int
axis
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
funcs
::
ElementwiseCompute
<
funcs
::
FMinFunctor
<
T
>
,
T
,
T
>
(
dev_ctx
,
x
,
y
,
axis
,
funcs
::
FMinFunctor
<
T
>
(),
out
);
...
...
paddle/phi/ops/compat/elementwise_sig.cc
浏览文件 @
5dcfb699
...
...
@@ -19,25 +19,19 @@ namespace phi {
KernelSignature
ElementwiseAddOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
int
axis
=
paddle
::
any_cast
<
int
>
(
ctx
.
Attr
(
"axis"
));
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"add"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"add_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"add"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"
unregistered"
,
{},
{},
{
});
return
KernelSignature
(
"
add_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
KernelSignature
ElementwiseSubOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
int
axis
=
paddle
::
any_cast
<
int
>
(
ctx
.
Attr
(
"axis"
));
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"subtract"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"subtract_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"subtract"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"
unregistered"
,
{},
{},
{
});
return
KernelSignature
(
"
subtract_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
KernelSignature
ElementwiseMulOpArgumentMapping
(
...
...
@@ -55,24 +49,18 @@ KernelSignature ElementwiseMulOpArgumentMapping(
KernelSignature
ElementwiseDivOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
int
axis
=
paddle
::
any_cast
<
int
>
(
ctx
.
Attr
(
"axis"
));
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"divide"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"divide_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
if
(
axis
==
-
1
)
{
return
KernelSignature
(
"divide"
,
{
"X"
,
"Y"
},
{},
{
"Out"
});
}
return
KernelSignature
(
"
unregistered"
,
{},
{},
{
});
return
KernelSignature
(
"
divide_raw"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
KernelSignature
ElementwiseAddGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"add_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
}
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
return
KernelSignature
(
"add_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
}
KernelSignature
ElementwiseAddDoubleGradOpArgumentMapping
(
...
...
@@ -91,13 +79,10 @@ KernelSignature ElementwiseAddTripleGradOpArgumentMapping(
KernelSignature
ElementwiseSubGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
IsDenseTensorInput
(
"X"
))
{
return
KernelSignature
(
"subtract_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
}
return
KernelSignature
(
"unregistered"
,
{},
{},
{});
return
KernelSignature
(
"subtract_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
}
KernelSignature
ElementwiseSubDoubleGradOpArgumentMapping
(
...
...
@@ -116,7 +101,7 @@ KernelSignature ElementwiseDivGradOpArgumentMapping(
KernelSignature
ElementwiseFMinGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"
elementwise_
fmin_grad"
,
return
KernelSignature
(
"fmin_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
...
...
@@ -138,9 +123,19 @@ KernelSignature ElementwiseMulGradOpArgumentMapping(
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
}
KernelSignature
ElementwiseFMaxOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"fmax"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
KernelSignature
ElementwiseFMinOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"fmin"
,
{
"X"
,
"Y"
},
{
"axis"
},
{
"Out"
});
}
KernelSignature
ElementwiseFMaxGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"
elementwise_
fmax_grad"
,
return
KernelSignature
(
"fmax_grad"
,
{
"X"
,
"Y"
,
GradVarName
(
"Out"
)},
{
"axis"
},
{
GradVarName
(
"X"
),
GradVarName
(
"Y"
)});
...
...
@@ -179,6 +174,10 @@ PD_REGISTER_BASE_KERNEL_NAME(elementwise_div_grad_grad, divide_double_grad);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_mul_grad
,
multiply_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_mul_grad_grad
,
multiply_double_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_mul_triple_grad
,
multiply_triple_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_fmax
,
fmax
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_fmin
,
fmin
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_fmax_grad
,
fmax_grad
);
PD_REGISTER_BASE_KERNEL_NAME
(
elementwise_fmin_grad
,
fmin_grad
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_add
,
phi
::
ElementwiseAddOpArgumentMapping
);
...
...
@@ -208,9 +207,12 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_mul_grad_grad,
phi
::
ElementwiseMulDoubleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_mul_triple_grad
,
phi
::
ElementwiseMulTripleGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_fmax
,
phi
::
ElementwiseFMaxOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_fmin
,
phi
::
ElementwiseFMinOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_fmax_grad
,
phi
::
ElementwiseFMaxGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
elementwise_fmin_grad
,
phi
::
ElementwiseFMinGradOpArgumentMapping
);
python/paddle/distributed/fleet/base/fleet_base.py
浏览文件 @
5dcfb699
...
...
@@ -578,7 +578,7 @@ class Fleet(object):
@
is_non_distributed_check
@
inited_runtime_handler
def
init_worker
(
self
):
def
init_worker
(
self
,
scopes
=
None
):
"""
initialize `Communicator` for parameter server training.
...
...
@@ -599,7 +599,7 @@ class Fleet(object):
fleet.init_worker()
"""
self
.
_runtime_handle
.
_init_worker
()
self
.
_runtime_handle
.
_init_worker
(
scopes
)
@
is_non_distributed_check
@
inited_runtime_handler
...
...
@@ -1419,6 +1419,21 @@ class Fleet(object):
# for more examples, please reference https://github.com/PaddlePaddle/FleetX
"""
if
not
isinstance
(
loss
,
list
):
return
self
.
_minimize_impl
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
else
:
if
paddle
.
fluid
.
framework
.
in_dygraph_mode
(
)
or
self
.
_role_maker
.
_is_non_distributed
()
or
self
.
_is_collective
:
raise
ValueError
(
"loss can be list only in PS mode"
)
return
self
.
_minimize_losses_impl
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
def
_minimize_impl
(
self
,
loss
,
startup_program
=
None
,
parameter_list
=
None
,
no_grad_set
=
None
):
context
=
{}
context
[
"user_defined_strategy"
]
=
copy
.
deepcopy
(
self
.
_user_defined_strategy
)
...
...
@@ -1447,6 +1462,7 @@ class Fleet(object):
"sharding_degree"
]
context
[
"origin_main_program"
]
=
self
.
origin_main_program
context
[
"origin_main_programs"
]
=
[
self
.
origin_main_program
]
context
[
"loss"
]
=
loss
if
startup_program
==
None
:
self
.
origin_startup_program
=
\
...
...
@@ -1457,6 +1473,7 @@ class Fleet(object):
startup_program
.
clone
(
for_test
=
False
)
context
[
"origin_startup_program"
]
=
startup_program
context
[
"origin_startup_programs"
]
=
[
startup_program
]
context
[
"role_maker"
]
=
self
.
_role_maker
# Use the auto-parallel's routines instead
...
...
@@ -1512,6 +1529,8 @@ class Fleet(object):
copy_user_defined_strategy
,
can_not_apply_optimizer_list
)
context
[
"valid_strategy"
]
=
copy
.
deepcopy
(
valid_strategy
)
# print("valid_strategy:", context["valid_strategy"])
# print("user_defined_strategy:", context["user_defined_strategy"])
applied_meta_list
=
self
.
strategy_compiler
.
_get_applied_meta_list
()
applied_graph_list
=
self
.
strategy_compiler
.
_get_applied_graph_list
()
...
...
@@ -1539,13 +1558,17 @@ class Fleet(object):
loss
,
startup_program
,
parameter_list
,
no_grad_set
=
no_grad_set
)
if
meta_optimizer
:
# print("before minimize program id:", id(loss.block.program))
optimize_ops
,
params_grads
=
meta_optimizer
.
minimize
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
=
no_grad_set
)
# print("after minimize program id:", id(loss.block.program))
default_program
=
paddle
.
static
.
default_main_program
()
# print("default program id:", id(default_program))
if
id
(
default_program
)
!=
id
(
loss
.
block
.
program
):
paddle
.
fluid
.
framework
.
switch_main_program
(
loss
.
block
.
program
)
# print("default program id after switch:", id(default_program))
else
:
optimize_ops
,
params_grads
=
self
.
user_defined_optimizer
.
minimize
(
...
...
@@ -1555,6 +1578,7 @@ class Fleet(object):
context
[
"program_params_grads"
]
=
params_grads
if
graph_optimizer
:
# print("before graph minimize program id:", id(loss.block.program))
optimize_ops
,
params_grads
=
graph_optimizer
.
minimize
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
=
no_grad_set
)
# since we do not encourage users to use graph operations
...
...
@@ -1568,13 +1592,90 @@ class Fleet(object):
if
not
self
.
_role_maker
.
_is_heter_parameter_server_mode
:
program
=
paddle
.
static
.
default_main_program
()
opt_info
=
{}
opt_info
=
{}
if
program
.
_fleet_opt
is
None
else
program
.
_fleet_opt
opt_info
[
"mpi_size"
]
=
self
.
worker_num
()
opt_info
[
"mpi_rank"
]
=
self
.
worker_index
()
for
k
,
v
in
self
.
_user_defined_strategy
.
trainer_desc_configs
.
items
(
):
opt_info
[
k
]
=
v
program
.
_fleet_opt
=
opt_info
if
self
.
_runtime_handle
is
None
:
self
.
_runtime_handle
=
RuntimeFactory
().
_create_runtime
(
context
)
import
paddle.distributed.fleet
as
fleet
fleet
.
util
.
_set_strategy
(
context
[
"valid_strategy"
])
return
optimize_ops
,
params_grads
def
_minimize_losses_impl
(
self
,
losses
,
startup_programs
=
None
,
parameter_list
=
None
,
no_grad_set
=
None
):
context
=
{}
# cache original feed forward program
self
.
origin_main_program
=
losses
[
0
].
block
.
program
context
[
"origin_main_program"
]
=
self
.
origin_main_program
context
[
"origin_main_programs"
]
=
[]
for
loss
in
losses
:
context
[
"origin_main_programs"
].
append
(
loss
.
block
.
program
)
context
[
"loss"
]
=
losses
if
startup_programs
is
None
:
if
len
(
losses
)
==
1
:
startup_programs
=
[
paddle
.
static
.
default_startup_program
()]
else
:
raise
ValueError
(
"startup_program can't be None when loss is list."
)
self
.
origin_startup_program
=
startup_programs
[
0
].
clone
(
for_test
=
False
)
context
[
"origin_startup_program"
]
=
startup_programs
[
0
]
context
[
"origin_startup_programs"
]
=
[]
for
program
in
startup_programs
:
context
[
"origin_startup_programs"
].
append
(
program
)
context
[
"role_maker"
]
=
self
.
_role_maker
context
[
"user_defined_strategy"
]
=
copy
.
deepcopy
(
self
.
_user_defined_strategy
)
context
[
"valid_strategy"
]
=
copy
.
deepcopy
(
self
.
_user_defined_strategy
)
self
.
_context
=
context
self
.
valid_strategy
=
context
[
"valid_strategy"
]
self
.
valid_strategy
.
_enable_env
()
optimize_ops
=
[]
params_grads
=
[]
from
..meta_optimizers
import
ParameterServerOptimizer
ps_optimizer
=
ParameterServerOptimizer
(
self
.
user_defined_optimizer
)
ps_optimizer
.
_set_basic_info
(
losses
,
self
.
_role_maker
,
self
.
user_defined_optimizer
,
self
.
_user_defined_strategy
)
optimize_ops
,
params_grads
=
ps_optimizer
.
minimize_losses_impl
(
losses
,
startup_programs
,
parameter_list
,
no_grad_set
=
no_grad_set
)
# default_program = paddle.static.default_main_program()
# if id(default_program) != id(losses[0].block.program):
# paddle.fluid.framework.switch_main_program(losses[0].block.program)
context
[
"program_optimize_ops"
]
=
optimize_ops
context
[
"program_params_grads"
]
=
params_grads
for
loss
in
losses
:
program
=
loss
.
block
.
program
opt_info
=
{}
if
program
.
_fleet_opt
is
None
else
program
.
_fleet_opt
opt_info
[
"mpi_size"
]
=
self
.
worker_num
()
opt_info
[
"mpi_rank"
]
=
self
.
worker_index
()
for
k
,
v
in
self
.
_user_defined_strategy
.
trainer_desc_configs
.
items
(
):
opt_info
[
k
]
=
v
program
.
_fleet_opt
=
opt_info
# print("fleet base opt info:", id(program), program._fleet_opt)
if
self
.
_runtime_handle
is
None
:
self
.
_runtime_handle
=
RuntimeFactory
().
_create_runtime
(
context
)
...
...
python/paddle/distributed/fleet/base/runtime_factory.py
浏览文件 @
5dcfb699
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
from
..runtime.collective_runtime
import
CollectiveRuntime
from
..runtime.parameter_server_runtime
import
ParameterServerRuntime
from
..
runtime
.the_one_ps
import
TheOnePSRuntime
from
..
.ps
.the_one_ps
import
TheOnePSRuntime
__all__
=
[]
...
...
python/paddle/distributed/fleet/meta_optimizers/__init__.py
浏览文件 @
5dcfb699
...
...
@@ -17,7 +17,7 @@ from .asp_optimizer import ASPOptimizer
from
.recompute_optimizer
import
RecomputeOptimizer
from
.gradient_merge_optimizer
import
GradientMergeOptimizer
from
.graph_execution_optimizer
import
GraphExecutionOptimizer
from
.p
arameter_server
_optimizer
import
ParameterServerOptimizer
from
.p
s
_optimizer
import
ParameterServerOptimizer
from
.pipeline_optimizer
import
PipelineOptimizer
from
.localsgd_optimizer
import
LocalSGDOptimizer
from
.localsgd_optimizer
import
AdaptiveLocalSGDOptimizer
...
...
python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
浏览文件 @
5dcfb699
...
...
@@ -110,8 +110,9 @@ class ParameterServerOptimizer(MetaOptimizerBase):
no_grad_set
)
if
startup_program
==
None
:
startup_program
=
paddle
.
static
.
default_startup_program
()
print
(
"program after inner optimizer minimize:"
,
str
(
loss
.
block
.
program
))
# print("program after inner optimizer minimize:",
# str(loss.block.program))
self
.
_set_origin_programs
([
loss
])
self
.
_init_ps_pass_context
(
loss
,
startup_program
)
ps_builder
=
PsProgramBuilderFactory
().
_create_ps_program_builder
(
...
...
@@ -181,7 +182,6 @@ class ParameterServerOptimizer(MetaOptimizerBase):
if
not
var
.
persistable
or
var
.
desc
.
type
(
)
!=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
:
continue
set_var_lod_type
(
var
)
param_memory_size
+=
get_var_mem_size
(
var
)
processed_var_names
.
add
(
varname
)
...
...
@@ -211,9 +211,8 @@ class ParameterServerOptimizer(MetaOptimizerBase):
data_count
*=
(
-
x
)
else
:
data_count
*=
x
program_tmp_vars
[
var_name
]
=
(
data_count
,
neg_dim_count
,
vars_metatools
.
dtype_to_size
[
var
.
dtype
])
program_tmp_vars
[
var_name
]
=
(
data_count
,
neg_dim_count
,
dtype_to_size
[
var
.
dtype
])
for
varname
in
program_tmp_vars
:
data_count
,
neg_dim_count
,
type_size
=
program_tmp_vars
[
varname
]
...
...
@@ -228,12 +227,19 @@ class ParameterServerOptimizer(MetaOptimizerBase):
return
False
def
_enable_strategy
(
self
,
dist_strategy
,
context
):
a_sync_configs
=
dist_strategy
.
a_sync_configs
if
dist_strategy
.
a_sync_configs
[
"k_steps"
]
>=
0
:
return
dist_strategy
.
a_sync
=
True
a_sync_configs
=
dist_strategy
.
a_sync_configs
is_geo
=
self
.
_can_apply_geo
(
context
[
"origin_main_program"
])
dist_strategy
.
a_sync_configs
[
"k_steps"
]
=
800
if
is_geo
else
0
a_sync_configs
[
"k_steps"
]
=
800
if
is_geo
else
0
dist_strategy
.
a_sync_configs
=
a_sync_configs
def
_disable_strategy
(
self
,
dist_strategy
):
dist_strategy
.
a_sync
=
False
a_sync_configs
=
dist_strategy
.
a_sync_configs
dist_strategy
.
a_sync_configs
[
"k_steps"
]
=
-
1
dist_strategy
.
a_sync_configs
=
a_sync_configs
python/paddle/distributed/fleet/runtime/the_one_ps.py
浏览文件 @
5dcfb699
...
...
@@ -62,9 +62,9 @@ def get_default_accessor_proto(accessor, varname, o_main_program):
if
not
accessor
.
HasField
(
"accessor_class"
):
accessor
.
accessor_class
=
"CtrCommonAccessor"
if
not
accessor
.
HasField
(
"fea_dim"
):
accessor
.
fea_dim
=
embedding_dim
+
2
accessor
.
fea_dim
=
embedding_dim
if
not
accessor
.
HasField
(
"embedx_dim"
):
accessor
.
embedx_dim
=
embedding_dim
-
1
accessor
.
embedx_dim
=
embedding_dim
-
3
if
not
accessor
.
HasField
(
"embedx_threshold"
):
accessor
.
embedx_threshold
=
0
...
...
@@ -129,15 +129,15 @@ def check_embedding_dim(accessor, varname, o_main_program):
embedding_dim
=
var
.
shape
[
1
]
break
fea_dim
=
accessor
.
fea_dim
if
fea_dim
!=
embedding_dim
+
2
:
if
fea_dim
!=
embedding_dim
:
raise
ValueError
(
"The fea_dim is wrong, it will be sparse_embedding_dim
+ 2
: {}, but got {}"
.
format
(
embedding_dim
+
2
,
fea_dim
))
"The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}"
.
format
(
embedding_dim
,
fea_dim
))
embedx_dim
=
accessor
.
embedx_dim
if
embedx_dim
!=
embedding_dim
-
1
:
if
embedx_dim
!=
embedding_dim
-
3
:
raise
ValueError
(
"The embedx_dim is wrong, it will be sparse_embedding_dim -
1
: {}, but got {}"
.
format
(
embedding_dim
-
1
,
embedx_dim
))
"The embedx_dim is wrong, it will be sparse_embedding_dim -
3
: {}, but got {}"
.
format
(
embedding_dim
-
3
,
embedx_dim
))
class
Accessor
:
...
...
@@ -927,7 +927,6 @@ class TheOnePSRuntime(RuntimeBase):
tables
=
[]
for
idx
,
(
name
,
ctx
)
in
enumerate
(
send_ctx
.
items
()):
print
(
" wxm python test send_ctx.items-->"
,
idx
,
(
name
,
ctx
))
if
ctx
.
is_tensor_table
()
or
len
(
ctx
.
origin_varnames
())
<
1
:
continue
...
...
python/paddle/distributed/fleet/utils/ps_util.py
浏览文件 @
5dcfb699
...
...
@@ -75,7 +75,7 @@ class DistributedInfer:
if
self
.
sparse_table_maps
is
None
:
self
.
sparse_table_maps
=
{}
send_ctx
=
fleet
.
fleet
.
_runtime_handle
.
_
communicator
.
send_ctx_
send_ctx
=
fleet
.
fleet
.
_runtime_handle
.
_
send_ctx
for
gradname
,
ctx
in
send_ctx
.
items
():
if
ctx
.
is_sparse
:
param
=
gradname
.
strip
(
"@GRAD"
)
...
...
python/paddle/distributed/passes/ps_server_pass.py
浏览文件 @
5dcfb699
...
...
@@ -155,8 +155,6 @@ class AddListenAndServPass(PassBase):
main_program
.
global_block
().
append_op
(
type
=
"listen_and_serv"
,
inputs
=
{
'X'
:
[]},
outputs
=
{},
attrs
=
opt
)
attrs
[
'cloned_main'
]
=
main_program
@
register_pass
(
"add_rpc_global_flags_pass"
)
class
AddRpcGlobalFlagsPass
(
PassBase
):
...
...
python/paddle/distributed/passes/ps_trainer_pass.py
浏览文件 @
5dcfb699
...
...
@@ -116,7 +116,7 @@ class DistributedOpsPass(PassBase):
def
_check_conflict
(
self
,
other_pass
):
return
True
def
_push_sparse_fuse
(
self
,
_program
,
push_sparse_ops
,
attrs
):
def
_push_sparse_fuse
(
self
,
_program
,
push_sparse_ops
,
attrs
,
use_cvm_op
):
if
attrs
[
'use_ps_gpu'
]:
return
if
len
(
push_sparse_ops
)
==
0
:
...
...
@@ -211,7 +211,8 @@ class DistributedOpsPass(PassBase):
"is_distributed"
:
is_distributed
,
"padding_idx"
:
padding_idx
,
"table_id"
:
table_id
,
"size"
:
self
.
emb_size
[
param
]
"size"
:
self
.
emb_size
[
param
],
"use_cvm_op"
:
use_cvm_op
})
def
_pull_sparse_fuse
(
self
,
_program
,
pull_sparse_ops
,
attrs
,
send_ctx
):
...
...
@@ -420,6 +421,7 @@ class DistributedOpsPass(PassBase):
pull_sparse_ids
=
{}
push_sparse_ops
=
{}
ops
=
{}
use_cvm_op
=
False
for
op
in
_program
.
global_block
().
ops
:
if
op
.
type
in
SPARSE_OP_TYPE_DICT
.
keys
()
\
and
op
.
attr
(
'remote_prefetch'
)
is
True
:
...
...
@@ -433,6 +435,9 @@ class DistributedOpsPass(PassBase):
ids
=
pull_sparse_ids
.
get
(
param_name
,
[])
ids
.
append
(
op
.
input
(
"Ids"
)[
0
])
pull_sparse_ids
[
param_name
]
=
ids
if
op
.
type
==
'cvm'
:
use_cvm_op
=
True
for
op
in
_program
.
global_block
().
ops
:
if
op
.
type
in
SPARSE_GRAD_OP_TYPE_DICT
.
keys
():
param_name
=
op
.
input
(
SPARSE_GRAD_OP_TYPE_DICT
[
op
.
type
])[
0
]
...
...
@@ -442,16 +447,16 @@ class DistributedOpsPass(PassBase):
ops
.
append
(
op
)
push_sparse_ops
[
param_name
]
=
ops
return
pull_sparse_ops
,
push_sparse_ops
return
pull_sparse_ops
,
push_sparse_ops
,
use_cvm_op
def
_apply_single_impl
(
self
,
main_program
,
startup_program
,
pass_ctx
):
attrs
=
pass_ctx
.
_attrs
pull_sparse_ops
,
push_sparse_ops
=
self
.
_get_pull_sparse_ops
(
pull_sparse_ops
,
push_sparse_ops
,
use_cvm_op
=
self
.
_get_pull_sparse_ops
(
main_program
,
attrs
)
send_ctx
=
get_the_one_send_context
(
attrs
,
split_dense_table
=
attrs
[
'is_heter_ps_mode'
])
self
.
_pull_sparse_fuse
(
main_program
,
pull_sparse_ops
,
attrs
,
send_ctx
)
self
.
_push_sparse_fuse
(
main_program
,
push_sparse_ops
,
attrs
)
self
.
_push_sparse_fuse
(
main_program
,
push_sparse_ops
,
attrs
,
use_cvm_op
)
@
register_pass
(
"delete_optimizer_pass"
)
...
...
python/paddle/distributed/ps/the_one_ps.py
浏览文件 @
5dcfb699
...
...
@@ -15,7 +15,7 @@
import
warnings
import
os
from
paddle.distributed.fleet.proto
import
ps_pb2
import
paddle.distributed.fleet.proto.the_one_ps_pb2
as
ps_pb2
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet
as
fleet
from
paddle.fluid
import
core
...
...
@@ -68,16 +68,30 @@ def check_embedding_dim(accessor_proto, varname, program_id, context):
print
(
'new var: {}, {}, {}'
.
format
(
var
,
embedding_dim
,
accessor_proto
.
fea_dim
))
break
fea_dim
=
accessor_proto
.
fea_dim
if
fea_dim
!=
embedding_dim
+
2
:
raise
ValueError
(
"The fea_dim is wrong, it will be sparse_embedding_dim + 2: {}, but got {}"
.
format
(
embedding_dim
+
2
,
fea_dim
))
if
accessor_proto
.
accessor_class
==
"SparseAccessor"
:
if
fea_dim
!=
embedding_dim
+
2
:
raise
ValueError
(
"The fea_dim is wrong, it will be sparse_embedding_dim + 2: {}, but got {}"
.
format
(
embedding_dim
+
2
,
fea_dim
))
else
:
if
fea_dim
!=
embedding_dim
:
raise
ValueError
(
"The fea_dim is wrong, it will be sparse_embedding_dim: {}, but got {}"
.
format
(
embedding_dim
,
fea_dim
))
embedx_dim
=
accessor_proto
.
embedx_dim
if
embedx_dim
!=
embedding_dim
-
1
:
raise
ValueError
(
"The embedx_dim is wrong, it will be sparse_embedding_dim - 1: {}, but got {}"
.
format
(
embedding_dim
-
1
,
embedx_dim
))
if
accessor_proto
.
accessor_class
==
"SparseAccessor"
:
if
embedx_dim
!=
embedding_dim
-
1
:
raise
ValueError
(
"The embedx_dim is wrong, it will be sparse_embedding_dim - 1: {}, but got {}"
.
format
(
embedding_dim
-
1
,
embedx_dim
))
else
:
if
embedx_dim
!=
embedding_dim
-
3
:
raise
ValueError
(
"The embedx_dim is wrong, it will be sparse_embedding_dim - 3: {}, but got {}"
.
format
(
embedding_dim
-
3
,
embedx_dim
))
class
Service
:
...
...
@@ -119,11 +133,18 @@ class Accessor:
break
if
not
accessor_proto
.
HasField
(
"accessor_class"
):
accessor_proto
.
accessor_class
=
"CtrCommonAccessor"
# DownpourSparseValueAccessor
accessor_proto
.
accessor_class
=
"SparseAccessor"
if
not
accessor_proto
.
HasField
(
"fea_dim"
):
accessor_proto
.
fea_dim
=
embedding_dim
+
2
if
accessor_proto
.
accessor_class
==
"SparseAccessor"
:
accessor_proto
.
fea_dim
=
embedding_dim
+
2
else
:
accessor_proto
.
fea_dim
=
embedding_dim
if
not
accessor_proto
.
HasField
(
"embedx_dim"
):
accessor_proto
.
embedx_dim
=
embedding_dim
-
1
if
accessor_proto
.
accessor_class
==
"SparseAccessor"
:
accessor_proto
.
embedx_dim
=
embedding_dim
-
1
else
:
accessor_proto
.
embedx_dim
=
embedding_dim
-
3
if
not
accessor_proto
.
HasField
(
"embedx_threshold"
):
accessor_proto
.
embedx_threshold
=
0
...
...
@@ -268,16 +289,16 @@ class CommonAccessor(Accessor):
attr_str
=
""
origin_var_name
=
value_name
print
(
"get_initializer_attr param name:"
,
value_name
)
#
print("get_initializer_attr param name:", value_name)
for
op
in
o_startup_program
.
global_block
().
ops
:
if
op
.
type
in
self
.
opt_init_map
.
keys
(
)
and
origin_var_name
==
op
.
output
(
"Out"
)[
0
]:
init_attr
=
[
op
.
type
]
print
(
"get_initializer_attr op type:"
,
op
.
type
)
#
print("get_initializer_attr op type:", op.type)
for
attr
in
self
.
opt_init_map
[
op
.
type
]:
print
(
"get_initializer_attr opt_init_map attr:"
,
attr
)
#
print("get_initializer_attr opt_init_map attr:", attr)
init_attr
.
append
(
str
(
op
.
attr
(
attr
)))
print
(
"get_initializer_attr op attr:"
,
str
(
op
.
attr
(
attr
)))
#
print("get_initializer_attr op attr:", str(op.attr(attr)))
attr_str
=
l_in
.
join
(
init_attr
)
break
return
attr_str
...
...
@@ -288,16 +309,16 @@ class CommonAccessor(Accessor):
size
=
ctx
.
sections
()[
0
]
single_dim
=
ctx
.
sections
()[
1
]
if
ctx
.
is_sparse
()
else
1
adam_d2sum
=
context
[
"user_defined_strategy"
].
adam_d2sum
print
(
"parse_by_optimizer table_id:{} is_datanorm:{}"
.
format
(
ctx
.
table_id
(),
ctx
.
is_datanorm_table
()))
#
print("parse_by_optimizer table_id:{} is_datanorm:{}".format(
#
ctx.table_id(), ctx.is_datanorm_table()))
main_program
,
startup_program
,
idx
=
get_program_by_id
(
context
,
ctx
.
program_id
())
pserver_id
=
get_role_id
(
context
[
'role_maker'
])
pserver_num
=
len
(
get_ps_endpoints
(
context
[
'role_maker'
]))
optimizer_ops
=
get_optimize_ops
(
main_program
)
print
(
"the one ps optimizer_ops:"
,
optimizer_ops
)
print
(
"the one ps parse_by_optimizer grad_name:"
,
grad_name
)
#
print("the one ps optimizer_ops:", optimizer_ops)
#
print("the one ps parse_by_optimizer grad_name:", grad_name)
oop
=
None
for
op
in
optimizer_ops
:
...
...
@@ -394,7 +415,7 @@ class CommonAccessor(Accessor):
initializer
=
self
.
get_initializer_attr
(
param
.
name
,
startup_program
)
elif
formal_name
==
"SummaryDecayRate"
:
initializer
=
"fill_constant&0.99999"
initializer
=
"fill_constant&0.99999
9
"
else
:
initializer
=
"fill_constant&0"
initializers
.
append
(
initializer
)
...
...
@@ -740,7 +761,6 @@ class PsDescBuilder(object):
def
_get_tables
(
self
):
tables
=
[]
for
idx
,
(
name
,
ctx
)
in
enumerate
(
self
.
send_ctx
.
items
()):
print
(
'####### {}
\n
'
.
format
(
ctx
.
is_sparse
()))
if
ctx
.
is_sparse
():
if
self
.
ps_mode
==
DistributedMode
.
GEO
:
tables
.
append
(
globals
()[
'GeoSparseTable'
](
self
.
context
,
...
...
@@ -778,11 +798,11 @@ class PsDescBuilder(object):
return
text_format
.
MessageToString
(
self
.
ps_desc
)
def
build_server_desc
(
self
):
self
.
sparse_table_maps
=
{}
for
table
in
self
.
tables
:
table_proto
=
self
.
ps_desc
.
server_param
.
downpour_server_param
.
downpour_table_param
.
add
(
)
table
.
_set
(
table_proto
)
self
.
sparse_table_maps
=
{}
if
table_proto
.
type
==
ps_pb2
.
PS_SPARSE_TABLE
and
table_proto
.
common
is
not
None
:
self
.
sparse_table_maps
[
table_proto
.
common
.
table_name
]
=
table_proto
.
table_id
...
...
@@ -801,6 +821,7 @@ class TheOnePSRuntime(RuntimeBase):
self
.
_worker
=
fluid
.
core
.
DistFleetWrapper
()
self
.
_server_sub_program
=
[]
self
.
_heter_client
=
None
self
.
_send_ctx
=
None
def
_set_basic_info
(
self
,
context
):
self
.
context
=
context
...
...
@@ -835,7 +856,40 @@ class TheOnePSRuntime(RuntimeBase):
self
.
ps_desc_builder
=
PsDescBuilder
(
self
.
context
)
def
_init_worker
(
self
):
def
_init_params
(
self
,
scopes
,
send_ctx
,
recv_map
):
for
name
,
ctx
in
send_ctx
.
items
():
if
ctx
.
is_sparse
():
continue
_
,
_
,
idx
=
get_program_by_id
(
self
.
context
,
ctx
.
program_id
())
scope
=
scopes
[
idx
]
table_id
=
ctx
.
table_id
()
var_names
=
recv_map
[
table_id
]
# print("init params:", idx, table_id, var_names)
self
.
_worker
.
push_dense_params
(
scope
,
table_id
,
var_names
)
def
_pull_all_dense
(
self
,
scopes
,
send_ctx
,
recv_map
):
for
name
,
ctx
in
send_ctx
.
items
():
if
ctx
.
is_sparse
():
continue
_
,
_
,
idx
=
get_program_by_id
(
self
.
context
,
ctx
.
program_id
())
scope
=
scopes
[
idx
]
table_id
=
ctx
.
table_id
()
var_names
=
recv_map
[
table_id
]
# print("pull all dense:", idx, table_id, var_names)
self
.
_worker
.
pull_dense_params
(
scope
,
table_id
,
var_names
)
def
_pull_dense
(
self
,
program
,
scope
,
send_ctx
,
recv_map
):
for
name
,
ctx
in
send_ctx
.
items
():
if
ctx
.
is_sparse
():
continue
if
ctx
.
program_id
()
!=
id
(
program
):
continue
table_id
=
ctx
.
table_id
()
var_names
=
recv_map
[
table_id
]
# print("pull dense:", table_id, var_names)
self
.
_worker
.
pull_dense_params
(
scope
,
table_id
,
var_names
)
def
_init_worker
(
self
,
scopes
=
None
):
worker_desc
=
self
.
ps_desc_builder
.
build_worker_desc
()
if
self
.
context
[
'use_ps_gpu'
]:
...
...
@@ -866,6 +920,7 @@ class TheOnePSRuntime(RuntimeBase):
split_dense_table
=
self
.
is_heter_ps_mode
,
use_origin_program
=
self
.
is_heter_ps_mode
,
ep_list
=
self
.
endpoints
)
self
.
_send_ctx
=
send_ctx
trainer_config
=
self
.
context
[
'trainer'
]
debug
=
bool
(
int
(
os
.
getenv
(
"PSERVER_DEBUG"
,
"0"
)))
...
...
@@ -889,23 +944,32 @@ class TheOnePSRuntime(RuntimeBase):
kwargs
.
update
(
sync_kwargs
)
print
(
"communicator config:"
,
trainer_config
.
get_communicator_flags
())
self
.
_communicator
=
Communicator
(
trainer_config
.
mode
,
kwargs
,
trainer_config
.
get_communicator_flags
())
self
.
_communicator
.
init_with_ctx
(
send_ctx
,
dense_map
,
proto_txt
,
self
.
string_hosts
,
fluid
.
global_scope
())
role_id
=
get_role_id
(
self
.
role_maker
)
self
.
_worker
.
init_worker
(
proto_txt
,
self
.
string_hosts
,
role_id
)
if
self
.
context
[
'ps_mode'
]
==
DistributedMode
.
GEO
:
self
.
_communicator
=
Communicator
(
trainer_config
.
mode
,
kwargs
,
trainer_config
.
get_communicator_flags
())
self
.
_communicator
.
init_with_ctx
(
send_ctx
,
dense_map
,
proto_txt
,
self
.
string_hosts
,
fluid
.
global_scope
())
fleet
.
util
.
barrier
()
info
=
self
.
_communicator
.
get_client_info
()
# info = self._communicator.get_client_info()
info
=
self
.
_worker
.
get_client_info
()
if
isinstance
(
info
,
list
)
and
len
(
info
)
>
0
:
all_info
=
self
.
role_maker
.
_all_gather
(
info
[
0
])
# for unittest
if
not
isinstance
(
all_info
,
list
):
warnings
.
warn
(
"gloo may not initialize correctly"
)
all_info
=
[
all_info
]
self
.
_communicator
.
set_clients
(
all_info
)
self
.
_communicator
.
create_client_to_client_connection
()
# self._communicator.set_clients(all_info)
# self._communicator.create_client_to_client_connection()
self
.
_worker
.
set_clients
(
all_info
)
self
.
_worker
.
create_client2client_connection
()
print
(
'create c2c connection done'
)
else
:
print
(
'cannot create c2c connection'
)
...
...
@@ -914,6 +978,7 @@ class TheOnePSRuntime(RuntimeBase):
is_test
=
bool
(
int
(
os
.
getenv
(
"TEST_MODE"
,
"0"
)))
# for GEO
if
self
.
role_maker
.
_is_first_worker
()
and
self
.
is_heter_ps_mode
:
# for ps-heter mode load all parameters on first_worker
init_params
=
get_the_one_recv_context
(
...
...
@@ -921,16 +986,38 @@ class TheOnePSRuntime(RuntimeBase):
else
:
init_params
=
dense_map
# if not is_test:
# self._communicator.init_params(init_params)
# fleet.util.barrier()
# self._communicator.pull_dense(init_params)
# fleet.util.barrier()
if
scopes
is
None
:
if
len
(
self
.
origin_main_programs
)
>
1
:
raise
ValueError
(
"You must set the scope list when you have Multiple programs"
)
scopes
=
[
fluid
.
global_scope
()]
if
len
(
self
.
origin_main_programs
)
!=
len
(
scopes
):
raise
VauleError
(
"len(programs) != len(scopes)"
)
self
.
scopes
=
scopes
if
not
is_test
:
self
.
_communicator
.
init_params
(
init_params
)
if
self
.
context
[
'ps_mode'
]
==
DistributedMode
.
GEO
:
self
.
_communicator
.
init_params
(
init_params
)
else
:
if
role_id
==
0
:
self
.
_init_params
(
scopes
,
send_ctx
,
dense_map
)
fleet
.
util
.
barrier
()
self
.
_
communicator
.
pull_dense
(
init_params
)
self
.
_
pull_all_dense
(
scopes
,
send_ctx
,
dense_map
)
fleet
.
util
.
barrier
()
if
not
self
.
_communicator
.
is_running
():
self
.
_communicator
.
start
()
else
:
warnings
.
warn
(
"communicator has been initialized, skip"
)
if
self
.
context
[
'ps_mode'
]
==
DistributedMode
.
GEO
:
if
not
self
.
_communicator
.
is_running
():
self
.
_communicator
.
start
()
else
:
warnings
.
warn
(
"communicator has been initialized, skip"
)
launch_barrier
=
dist_strategy
.
a_sync_configs
[
"launch_barrier"
]
launch_barrier_flag
=
int
(
os
.
getenv
(
"FLAGS_LAUNCH_BARRIER"
,
"1"
))
...
...
@@ -996,7 +1083,9 @@ class TheOnePSRuntime(RuntimeBase):
self
.
_server
.
run_server
(
host
,
int
(
port
))
def
_stop_worker
(
self
):
self
.
_communicator
.
stop
()
if
self
.
context
[
'ps_mode'
]
==
DistributedMode
.
GEO
:
self
.
_communicator
.
stop
()
self
.
_worker
.
stop_worker
()
if
self
.
is_heter_ps_mode
:
assert
self
.
_heter_client
!=
None
,
"heter client should not be None in heterps mode"
self
.
_heter_client
.
stop
()
...
...
@@ -1151,7 +1240,11 @@ class TheOnePSRuntime(RuntimeBase):
"in fleet.save() function, executor must be as Executor type"
)
import
paddle
program
=
self
.
origin_main_program
if
main_program
is
None
else
main_program
program
=
self
.
origin_main_programs
[
0
]
if
main_program
is
None
else
main_program
_
,
_
,
idx
=
get_program_by_id
(
self
.
context
,
id
(
program
))
scope
=
self
.
scopes
[
idx
]
print
(
"save inference model scope idx:"
,
idx
)
if
isinstance
(
program
,
CompiledProgram
):
raise
TypeError
(
...
...
@@ -1180,12 +1273,14 @@ class TheOnePSRuntime(RuntimeBase):
sparse_names
=
self
.
_save_sparse_params
(
executor
,
dirname
,
sparses
,
main_program
,
mode
)
denses
=
get_the_one_recv_context
(
dense_map
=
get_the_one_recv_context
(
self
.
context
,
split_dense_table
=
self
.
is_heter_ps_mode
)
send_ctx
=
get_the_one_send_context
(
self
.
context
,
is_dense
=
True
,
split_dense_table
=
self
.
is_heter_ps_mode
,
use_origin_program
=
True
)
self
.
_communicator
.
pull_dense
(
denses
)
use_origin_program
=
self
.
is_heter_ps_mode
,
ep_list
=
self
.
endpoints
)
self
.
_pull_dense
(
program
,
scope
,
send_ctx
,
dense_map
)
generate_vars
=
self
.
context
[
"user_defined_strategy"
].
trainer_desc_configs
[
"stat_var_names"
]
...
...
@@ -1196,7 +1291,7 @@ class TheOnePSRuntime(RuntimeBase):
infer_program
.
list_vars
()))
for
var
in
remaining_vars
:
tensor
=
var
.
get_value
()
tensor
=
var
.
get_value
(
scope
)
paddle
.
save
(
tensor
,
os
.
path
.
join
(
model_path
,
var
.
name
),
...
...
python/paddle/distributed/ps/utils/ps_program_builder.py
浏览文件 @
5dcfb699
...
...
@@ -37,6 +37,37 @@ class PsProgramBuilder(object):
self
.
server_endpoints
=
self
.
attrs
[
'role_maker'
].
_get_pserver_endpoints
(
)
def
_build_trainer_desc
(
self
):
opt_info
=
self
.
loss
.
block
.
program
.
_fleet_opt
opt_info
=
{}
if
opt_info
is
None
else
opt_info
opt_info
[
"trainer"
]
=
opt_info
.
get
(
"trainer"
,
"DistMultiTrainer"
)
opt_info
[
"device_worker"
]
=
opt_info
.
get
(
"device_worker"
,
"DownpourLite"
)
pid
=
str
(
id
(
self
.
cloned_main
))
program_configs
=
{
pid
:
{
'pull_dense'
:
[],
'push_dense'
:
[],
'pull_sparse'
:
[],
'push_sparse'
:
[]
}
}
dense_table_config
=
{}
send_ctx
=
get_the_one_send_context
(
self
.
attrs
)
recv_ctx
=
get_the_one_recv_context
(
self
.
attrs
)
for
name
,
ctx
in
send_ctx
.
items
():
if
ctx
.
program_id
()
!=
id
(
self
.
loss
.
block
.
program
):
continue
if
ctx
.
is_sparse
():
continue
if
not
ctx
.
is_tensor_table
():
program_configs
[
pid
][
'pull_dense'
].
append
(
ctx
.
table_id
())
program_configs
[
pid
][
'push_dense'
].
append
(
ctx
.
table_id
())
dense_table_config
[
ctx
.
table_id
()]
=
recv_ctx
[
ctx
.
table_id
()]
opt_info
[
'program_configs'
]
=
program_configs
opt_info
[
'dense_table_config'
]
=
dense_table_config
self
.
cloned_main
.
_fleet_opt
=
opt_info
def
_optimize_programs
(
self
):
pass
...
...
@@ -63,7 +94,15 @@ class PsProgramBuilder(object):
logger
.
info
(
"start building trainer program"
)
self
.
_build_trainer_programs
()
fluid
.
framework
.
switch_startup_program
(
self
.
cloned_startup
)
# print("ps_program_build before =", id(self.loss.block.program))
self
.
_build_trainer_desc
()
self
.
loss
.
block
.
program
=
self
.
cloned_main
# print("ps_program_build after =", id(self.loss.block.program))
# print("ps_program_build clone after =", id(self.cloned_main))
# print("ps_program_build after trainer_desc",
# id(self.loss.block.program))
# print("ps_program build trainer desc",
# self.loss.block.program._fleet_opt)
elif
self
.
attrs
[
'is_server'
]:
logger
.
info
(
"start building pserver program"
)
...
...
@@ -92,6 +131,13 @@ class GeoPsProgramBuilder(PsProgramBuilder): # 仅 CPU 模式
return
def
_build_pserver_programs
(
self
):
add_listen_and_serv_pass
=
new_pass
(
'add_listen_and_serv_pass'
,
self
.
attrs
)
add_listen_and_serv_pass
.
apply
([
self
.
attrs
[
'_main_server'
]],
[
None
],
self
.
pass_ctx
)
return
class
CpuSyncPsProgramBuilder
(
PsProgramBuilder
):
def
__init__
(
self
,
pass_ctx
):
...
...
@@ -103,13 +149,13 @@ class CpuSyncPsProgramBuilder(PsProgramBuilder):
format
(
self
.
ps_mode
,
"PsProgramBuilder"
))
def
_build_trainer_programs
(
self
):
print
(
"build trainer program entry"
)
print
(
"before ps program builder program:"
,
self
.
cloned_main
)
#
print("build trainer program entry")
#
print("before ps program builder program:", self.cloned_main)
add_lr_decay_table_pass
=
new_pass
(
"add_lr_decay_table_pass"
,
self
.
attrs
)
add_lr_decay_table_pass
.
apply
([],
[],
self
.
pass_ctx
)
print
(
"before distributed op pass"
)
#
print("before distributed op pass")
distributed_ops_pass
=
new_pass
(
"distributed_ops_pass"
,
self
.
attrs
)
distributed_ops_pass
.
apply
([
self
.
cloned_main
],
[
None
],
self
.
pass_ctx
)
...
...
@@ -129,7 +175,7 @@ class CpuSyncPsProgramBuilder(PsProgramBuilder):
self
.
attrs
[
'origin_main_program'
]
=
self
.
cloned_main
self
.
attrs
[
'origin_startup_program'
]
=
self
.
cloned_startup
print
(
"after ps program builder program:"
,
self
.
cloned_main
)
#
print("after ps program builder program:", self.cloned_main)
if
self
.
launch_barrier
and
self
.
launch_barrier_flag
:
wait_server_ready
(
self
.
server_endpoints
)
...
...
python/paddle/distributed/ps/utils/public.py
浏览文件 @
5dcfb699
...
...
@@ -23,7 +23,6 @@ import logging
import
six
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.core
import
CommContext
import
paddle.fluid.framework
as
framework
import
paddle.distributed.fleet
as
fleet
...
...
@@ -73,9 +72,9 @@ def logger_config(log_path, logging_name):
return
logger
ps_log_root_dir
=
'/ps_log/'
ps_log_root_dir
=
'
.
/ps_log/'
logger
=
logger_config
(
log_path
=
'/ps_usr_print_log'
,
logging_name
=
'ps_usr_print_log'
)
log_path
=
'
.
/ps_usr_print_log'
,
logging_name
=
'ps_usr_print_log'
)
class
DistributedMode
:
...
...
@@ -342,6 +341,7 @@ def get_dense_send_context(program,
aggregate
=
True
print
(
"public get_dense_send_context dense_table:"
,
grad_name
,
var_numel
,
origin_varnames
)
from
paddle.fluid.core
import
CommContext
dense_ctx
=
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
[
var_numel
],
origin_varnames
,
trainer_id
,
aggregate
,
False
,
False
,
idx
,
False
,
False
,
...
...
@@ -364,6 +364,7 @@ def get_dense_send_context(program,
aggregate
=
True
print
(
"public get_dense_send_context data_norm table:"
,
grad_name
,
var_numel
,
origin_varnames
)
from
paddle.fluid.core
import
CommContext
data_norm_ctx
=
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
[
var_numel
],
origin_varnames
,
trainer_id
,
aggregate
,
False
,
False
,
idx
,
False
,
True
,
...
...
@@ -378,6 +379,7 @@ def get_dense_send_context(program,
var_numel
=
reduce
(
lambda
x
,
y
:
x
*
y
,
var
.
shape
)
grad_name
=
origin_varname
aggregate
=
True
from
paddle.fluid.core
import
CommContext
dense_ctx
=
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
[
var_numel
],
[
origin_varname
],
trainer_id
,
aggregate
,
False
,
False
,
idx
,
False
,
False
,
...
...
@@ -407,7 +409,7 @@ def get_geo_trainer_send_context(context):
var
=
program
.
global_block
().
vars
[
grad
.
merged_var
.
name
]
var_numel
=
reduce
(
lambda
x
,
y
:
x
*
y
,
var
.
shape
[
1
:])
from
paddle.fluid.core
import
CommContext
sparse_ctx
=
CommContext
(
grad_name
,
[
grad_name
],
[
"127.0.0.1:6071"
],
[
var_numel
],
[
grad_name
],
trainer_id
,
True
,
True
,
...
...
@@ -432,6 +434,7 @@ def _step_ctx(idx, role_maker):
endpoints
=
get_ps_endpoints
(
role_maker
)
sections
=
[
1
]
*
len
(
endpoints
)
names
=
[
name
]
*
len
(
endpoints
)
from
paddle.fluid.core
import
CommContext
ctx
=
CommContext
(
name
,
names
,
endpoints
,
sections
,
[
name
],
trainer_id
,
True
,
False
,
False
,
idx
,
True
,
False
,
-
1
)
return
name
,
ctx
...
...
@@ -448,12 +451,8 @@ def get_the_one_send_context(context,
origin_programs
=
context
[
'origin_main_programs'
]
idx
=
0
for
i
,
program
in
enumerate
(
origin_programs
):
merged_dense_pairs
=
context
[
'merged_dense_pairs'
][
i
]
idx
=
get_dense_send_context
(
program
,
send_ctx
,
idx
,
merged_dense_pairs
,
trainer_id
,
split_dense_table
)
distibuted_varnames
=
get_sparse_tablenames
(
origin_programs
,
True
)
print
(
"public distibuted_varnames:"
,
distibuted_varnames
)
#
print("public distibuted_varnames:", distibuted_varnames)
for
i
,
program
in
enumerate
(
origin_programs
):
merged_sparse_pairs
=
context
[
'merged_sparse_pairs'
][
i
]
for
merged
in
merged_sparse_pairs
:
...
...
@@ -472,10 +471,11 @@ def get_the_one_send_context(context,
shape
=
list
(
var
.
shape
)
shape
[
0
]
=
0
if
is_distributed
else
shape
[
0
]
print
(
"public get_the_one_send_context sparse:"
,
grad_name
,
splited_varname
,
shape
)
#
print("public get_the_one_send_context sparse:", grad_name,
#
splited_varname, shape)
if
grad_name
in
send_ctx
:
continue
from
paddle.fluid.core
import
CommContext
sparse_ctx
=
CommContext
(
grad_name
,
splited_varname
,
ep_list
,
shape
,
[
grad_name
],
trainer_id
,
True
,
True
,
is_distributed
,
idx
,
False
,
False
,
...
...
@@ -484,6 +484,11 @@ def get_the_one_send_context(context,
idx
+=
1
send_ctx
[
sparse_ctx
.
var_name
()]
=
sparse_ctx
for
i
,
program
in
enumerate
(
origin_programs
):
merged_dense_pairs
=
context
[
'merged_dense_pairs'
][
i
]
idx
=
get_dense_send_context
(
program
,
send_ctx
,
idx
,
merged_dense_pairs
,
trainer_id
,
split_dense_table
)
if
len
(
context
[
'tensor_table'
])
>
0
and
context
[
'is_worker'
]:
name
,
ctx
=
_step_ctx
(
idx
,
context
[
'role_maker'
])
send_ctx
[
name
]
=
ctx
...
...
@@ -1258,8 +1263,8 @@ def build_var_distributed(context):
context
[
"merged_variable_map"
]
=
{}
for
origin_program
in
origin_programs
:
sparse_pairs
,
dense_pairs
=
get_param_grads
(
origin_program
)
print
(
"public build_var_distributed sparse_pairs:"
,
sparse_pairs
)
print
(
"public build_var_distributed dense_pairs:"
,
dense_pairs
)
#
print("public build_var_distributed sparse_pairs:", sparse_pairs)
#
print("public build_var_distributed dense_pairs:", dense_pairs)
origin_for_sparse
=
[]
origin_for_dense
=
[]
merged_sparse_pairs
=
[]
...
...
@@ -1279,8 +1284,8 @@ def build_var_distributed(context):
m_grad
=
MergedVariable
(
grad
,
[
grad
],
[
0
])
merged_variables_pairs
.
append
((
m_param
,
m_grad
))
merged_dense_pairs
.
append
((
m_param
,
m_grad
))
print
(
"public build_var_distributed merged_dense_pairs:"
,
merged_dense_pairs
)
#
print("public build_var_distributed merged_dense_pairs:",
#
merged_dense_pairs)
for
sparse_pair
in
origin_for_sparse
:
param
,
grad
=
sparse_pair
...
...
@@ -1289,8 +1294,8 @@ def build_var_distributed(context):
m_grad
=
MergedVariable
(
grad
,
[
grad
],
[
0
])
merged_variables_pairs
.
append
((
m_param
,
m_grad
))
merged_sparse_pairs
.
append
((
m_param
,
m_grad
))
print
(
"public build_var_distributed merged_sparse_pairs:"
,
merged_sparse_pairs
)
#
print("public build_var_distributed merged_sparse_pairs:",
#
merged_sparse_pairs)
for
merged
in
merged_variables_pairs
:
m_param
,
m_grad
=
merged
...
...
@@ -1315,18 +1320,19 @@ def build_var_distributed(context):
context
[
"param_name_to_grad_name"
]
=
param_name_to_grad_name
context
[
"grad_name_to_param_name"
]
=
grad_name_to_param_name
print
(
"public build_var_distributed origin_sparse_pairs:"
,
context
[
"origin_sparse_pairs"
])
print
(
"public build_var_distributed origin_for_dense:"
,
context
[
"origin_dense_pairs"
])
print
(
"public build_var_distributed merged_sparse_pairs:"
,
context
[
"merged_sparse_pairs"
])
print
(
"public build_var_distributed merged_dense_pairs:"
,
context
[
'merged_dense_pairs'
])
print
(
"public build_var_distributed param_name_to_grad_name:"
,
param_name_to_grad_name
)
print
(
"public build_var_distributed grad_name_to_param_name:"
,
grad_name_to_param_name
)
# print("public build_var_distributed origin_sparse_pairs:",
# context["origin_sparse_pairs"])
# print("public build_var_distributed origin_for_dense:",
# context["origin_dense_pairs"])
# print("public build_var_distributed merged_sparse_pairs:",
# context["merged_sparse_pairs"])
# print("public build_var_distributed merged_dense_pairs:",
# context['merged_dense_pairs'])
# print("public build_var_distributed param_name_to_grad_name:",
# param_name_to_grad_name)
# print("public build_var_distributed grad_name_to_param_name:",
# grad_name_to_param_name)
def
_is_opt_role_op
(
op
):
...
...
python/paddle/fluid/communicator.py
浏览文件 @
5dcfb699
...
...
@@ -62,13 +62,18 @@ class Communicator(object):
"""
# set all recv op to not_run mode
if
mode
==
DistributedMode
.
SYNC
:
envs
[
"pserver_endpoints"
]
=
','
.
join
(
kwargs
[
"pserver_endpoints"
])
envs
[
"trainers"
]
=
str
(
kwargs
[
"trainers"
])
envs
[
"trainer_id"
]
=
str
(
kwargs
[
"trainer_id"
])
envs
[
"need_global_step"
]
=
str
(
kwargs
[
"need_global_step"
])
envs
[
"barrier_table_id"
]
=
str
(
kwargs
[
"barrier_table_id"
])
if
kwargs
==
None
:
if
envs
==
None
:
envs
=
{}
else
:
if
mode
==
DistributedMode
.
SYNC
:
envs
[
"pserver_endpoints"
]
=
','
.
join
(
kwargs
[
"pserver_endpoints"
])
envs
[
"trainers"
]
=
str
(
kwargs
[
"trainers"
])
envs
[
"trainer_id"
]
=
str
(
kwargs
[
"trainer_id"
])
envs
[
"need_global_step"
]
=
str
(
kwargs
[
"need_global_step"
])
envs
[
"barrier_table_id"
]
=
str
(
kwargs
[
"barrier_table_id"
])
mode_str
=
None
...
...
@@ -129,6 +134,9 @@ class Communicator(object):
comm.start()
comm.stop()
"""
if
self
.
communicator_
==
None
:
print
(
'you must call init_with_ctx first to init comm before start'
)
return
self
.
communicator_
.
start
()
def
stop
(
self
):
...
...
@@ -148,6 +156,9 @@ class Communicator(object):
comm.start()
comm.stop()
"""
if
self
.
communicator_
==
None
:
print
(
'you must call init_with_ctx first to init comm before stop'
)
return
self
.
communicator_
.
stop
()
def
is_running
(
self
):
...
...
@@ -166,6 +177,9 @@ class Communicator(object):
comm = fluid.communicator.Communicator(prog)
comm.is_running()
"""
if
self
.
communicator_
==
None
:
print
(
'you must call init_with_ctx first to init comm before stop'
)
return
self
.
communicator_
.
is_running
()
def
recv
(
self
):
...
...
python/paddle/fluid/dataset.py
浏览文件 @
5dcfb699
...
...
@@ -862,9 +862,9 @@ class InMemoryDataset(DatasetBase):
thread_num(int): shuffle thread num. Default is 12.
"""
from
paddle.fluid.incubate.fleet.parameter_server.pslib
import
PSLib
if
fleet
is
not
None
:
if
not
isinstance
(
fleet
,
PSLib
):
if
hasattr
(
fleet
,
"barrier_worker"
):
print
(
"pscore fleet"
)
fleet
.
barrier_worker
()
else
:
fleet
.
_role_maker
.
barrier_worker
()
...
...
@@ -879,20 +879,20 @@ class InMemoryDataset(DatasetBase):
self
.
dataset
.
set_fleet_send_batch_size
(
self
.
fleet_send_batch_size
)
self
.
dataset
.
set_fleet_send_sleep_seconds
(
self
.
fleet_send_sleep_seconds
)
if
fleet
is
not
None
:
if
not
isinstance
(
fleet
,
PSLib
):
if
hasattr
(
fleet
,
"barrier_worker"
):
fleet
.
barrier_worker
()
else
:
fleet
.
_role_maker
.
barrier_worker
()
self
.
dataset
.
global_shuffle
(
thread_num
)
if
fleet
is
not
None
:
if
not
isinstance
(
fleet
,
PSLib
):
if
hasattr
(
fleet
,
"barrier_worker"
):
fleet
.
barrier_worker
()
else
:
fleet
.
_role_maker
.
barrier_worker
()
if
self
.
merge_by_lineid
:
self
.
dataset
.
merge_by_lineid
()
if
fleet
is
not
None
:
if
not
isinstance
(
fleet
,
PSLib
):
if
hasattr
(
fleet
,
"barrier_worker"
):
fleet
.
barrier_worker
()
else
:
fleet
.
_role_maker
.
barrier_worker
()
...
...
@@ -1026,9 +1026,8 @@ class InMemoryDataset(DatasetBase):
local_data_size
=
np
.
array
([
local_data_size
])
print
(
'global shuffle local_data_size: '
,
local_data_size
)
if
fleet
is
not
None
:
from
paddle.fluid.incubate.fleet.parameter_server.pslib
import
PSLib
global_data_size
=
local_data_size
*
0
if
not
isinstance
(
fleet
,
PSLib
):
if
hasattr
(
fleet
,
"util"
):
global_data_size
=
fleet
.
util
.
all_reduce
(
local_data_size
)
else
:
fleet
.
_role_maker
.
all_reduce_worker
(
local_data_size
,
...
...
python/paddle/fluid/device_worker.py
浏览文件 @
5dcfb699
...
...
@@ -99,6 +99,7 @@ class Hogwild(DeviceWorker):
dense_table_set
=
set
()
program_id
=
str
(
id
(
self
.
_program
))
print
(
"device worker program id:"
,
program_id
)
if
self
.
_program
==
None
:
print
(
"program of current device worker is not configured"
)
exit
(
-
1
)
...
...
@@ -115,15 +116,20 @@ class Hogwild(DeviceWorker):
from
paddle.fluid.incubate.fleet.parameter_server
import
version
if
version
.
is_transpiler
()
and
"fleet_desc"
not
in
opt_info
:
if
version
.
is_transpiler
(
)
and
"fleet_desc"
not
in
opt_info
and
"program_configs"
not
in
opt_info
:
return
program_configs
=
opt_info
[
"program_configs"
]
print
(
"device worker program_configs:"
,
program_configs
)
for
pid
in
program_configs
:
print
(
"device worker"
,
pid
,
program_id
)
if
pid
==
program_id
:
pc
=
downpour
.
program_config
.
add
()
pc
.
program_id
=
program_id
print
(
"device worker pull dense:"
,
program_configs
[
program_id
][
"pull_dense"
])
for
i
in
program_configs
[
program_id
][
"push_sparse"
]:
pc
.
push_sparse_table_id
.
extend
([
i
])
for
i
in
program_configs
[
program_id
][
"push_dense"
]:
...
...
@@ -139,50 +145,189 @@ class Hogwild(DeviceWorker):
trainer_desc
.
device_worker_name
=
"HogwildWorker"
pull_thread
=
trainer_desc
.
pull_dense_param
pull_thread
.
device_num
=
trainer_desc
.
thread_num
if
opt_info
.
get
(
"program_id_to_worker"
)
is
None
:
raise
ValueError
(
"opt_info must have program_id_to_worker"
)
prog_id_to_worker
=
opt_info
[
"program_id_to_worker"
]
if
prog_id_to_worker
.
get
(
program_id
)
is
None
:
raise
ValueError
(
"%s not found in program_id_to_worker"
%
program_id
)
worker
=
opt_info
[
"program_id_to_worker"
][
program_id
]
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
if
opt_info
.
get
(
"program_id_to_worker"
)
is
None
and
opt_info
.
get
(
"dense_table_config"
)
is
None
:
raise
ValueError
(
"opt_info must have program_id_to_worker or dense_table_config"
)
if
opt_info
.
get
(
"program_id_to_worker"
)
is
not
None
:
prog_id_to_worker
=
opt_info
[
"program_id_to_worker"
]
if
prog_id_to_worker
.
get
(
program_id
)
is
None
:
raise
ValueError
(
"%s not found in program_id_to_worker"
%
program_id
)
worker
=
opt_info
[
"program_id_to_worker"
][
program_id
]
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
table_id
=
\
i
.
table_id
sparse_len
=
len
(
worker
.
get_desc
().
sparse_table
)
for
i
in
range
(
sparse_len
):
sparse_table
=
downpour
.
sparse_table
.
add
()
sparse_table
.
table_id
=
worker
.
get_desc
().
sparse_table
[
i
].
table_id
sparse_table
.
sparse_key_name
.
extend
(
worker
.
get_desc
()
.
sparse_table
[
i
].
slot_key
)
sparse_table
.
sparse_value_name
.
extend
(
worker
.
get_desc
(
).
sparse_table
[
i
].
slot_value
)
sparse_table
.
sparse_grad_name
.
extend
(
worker
.
get_desc
(
).
sparse_table
[
i
].
slot_gradient
)
sparse_table
.
fea_dim
=
\
self
.
_fleet_desc
.
server_param
.
downpour_server_param
.
downpour_table_param
[
i
].
accessor
.
fea_dim
# not use emb_dim
sparse_table
.
emb_dim
=
-
1
# not use hard code click
sparse_table
.
label_var_name
=
""
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
downpour
.
dense_table
.
add
()
dense_table
.
table_id
=
i
.
table_id
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
dense_grad_name
.
extend
(
i
.
dense_gradient_variable_name
)
hogwild
.
skip_ops
.
extend
(
worker
.
get_desc
().
skip_op
)
else
:
dense_table_config
=
opt_info
.
get
(
"dense_table_config"
)
print
(
"device worker dense_table_config:"
,
dense_table_config
)
for
table_id
,
varnames
in
dense_table_config
.
items
():
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
table_id
=
\
i
.
table_id
sparse_len
=
len
(
worker
.
get_desc
().
sparse_table
)
for
i
in
range
(
sparse_len
):
sparse_table
=
downpour
.
sparse_table
.
add
()
sparse_table
.
table_id
=
worker
.
get_desc
().
sparse_table
[
i
].
table_id
sparse_table
.
sparse_key_name
.
extend
(
worker
.
get_desc
().
sparse_table
[
i
].
slot_key
)
sparse_table
.
sparse_value_name
.
extend
(
worker
.
get_desc
()
.
sparse_table
[
i
].
slot_value
)
sparse_table
.
sparse_grad_name
.
extend
(
worker
.
get_desc
().
sparse_table
[
i
].
slot_gradient
)
sparse_table
.
fea_dim
=
\
self
.
_fleet_desc
.
server_param
.
downpour_server_param
.
downpour_table_param
[
i
].
accessor
.
fea_dim
# not use emb_dim
sparse_table
.
emb_dim
=
-
1
# not use hard code click
sparse_table
.
label_var_name
=
""
dense_table
.
dense_value_name
.
extend
(
varnames
)
dense_table
.
table_id
=
table_id
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
downpour
.
dense_table
.
add
()
dense_table
.
table_id
=
i
.
table_id
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
dense_grad_name
.
extend
(
i
.
dense_gradient_variable_name
)
hogwild
.
skip_ops
.
extend
(
worker
.
get_desc
().
skip_op
)
if
self
.
_infer
:
hogwild
.
skip_ops
.
extend
(
[
"push_sparse"
,
"push_sparse_v2"
,
"push_dense"
])
class
DownpourLite
(
DeviceWorker
):
"""
DownpourLite is a kind of SGD algorithm.
"""
def
__init__
(
self
):
"""Init."""
super
(
DownpourLite
,
self
).
__init__
()
def
_gen_worker_desc
(
self
,
trainer_desc
):
"""
Generator worker desc, which device worker is DownpourLiteWorker.
Args:
trainer_desc(TrainerDesc): a TrainerDesc object
"""
print
(
"create DownpourLiteWorker"
)
trainer_desc
.
device_worker_name
=
"DownpourLiteWorker"
if
self
.
_infer
:
# just ignore feed op for inference model
trainer_desc
.
downpour_param
.
skip_ops
.
extend
([
"feed"
,
"push_sparse"
,
"push_sparse_v2"
,
"push_dense"
,
"distributed_push_sparse"
,
"send"
])
dense_table_set
=
set
()
program_id
=
str
(
id
(
self
.
_program
))
print
(
"device worker program id:"
,
program_id
)
if
self
.
_program
==
None
:
print
(
"program of current device worker is not configured"
)
exit
(
-
1
)
opt_info
=
self
.
_program
.
_fleet_opt
# when opt_info is None or empty dict, it should return
if
not
opt_info
:
return
downpour
=
trainer_desc
.
downpour_param
if
opt_info
[
"stat_var_names"
]:
for
i
in
opt_info
[
"stat_var_names"
]:
downpour
.
stat_var_names
.
extend
([
i
])
from
paddle.fluid.incubate.fleet.parameter_server
import
version
if
version
.
is_transpiler
(
)
and
"fleet_desc"
not
in
opt_info
and
"program_configs"
not
in
opt_info
:
return
program_configs
=
opt_info
[
"program_configs"
]
print
(
"device worker program_configs:"
,
program_configs
)
for
pid
in
program_configs
:
print
(
"device worker"
,
pid
,
program_id
)
if
pid
==
program_id
:
pc
=
downpour
.
program_config
.
add
()
pc
.
program_id
=
program_id
print
(
"device worker pull dense:"
,
program_configs
[
program_id
][
"pull_dense"
])
for
i
in
program_configs
[
program_id
][
"push_sparse"
]:
pc
.
push_sparse_table_id
.
extend
([
i
])
for
i
in
program_configs
[
program_id
][
"push_dense"
]:
pc
.
push_dense_table_id
.
extend
([
i
])
dense_table_set
.
add
(
i
)
for
i
in
program_configs
[
program_id
][
"pull_sparse"
]:
pc
.
pull_sparse_table_id
.
extend
([
i
])
for
i
in
program_configs
[
program_id
][
"pull_dense"
]:
pc
.
pull_dense_table_id
.
extend
([
i
])
dense_table_set
.
add
(
i
)
break
pull_thread
=
trainer_desc
.
pull_dense_param
pull_thread
.
device_num
=
trainer_desc
.
thread_num
if
opt_info
.
get
(
"program_id_to_worker"
)
is
None
and
opt_info
.
get
(
"dense_table_config"
)
is
None
:
raise
ValueError
(
"opt_info must have program_id_to_worker or dense_table_config"
)
if
opt_info
.
get
(
"program_id_to_worker"
)
is
not
None
:
prog_id_to_worker
=
opt_info
[
"program_id_to_worker"
]
if
prog_id_to_worker
.
get
(
program_id
)
is
None
:
raise
ValueError
(
"%s not found in program_id_to_worker"
%
program_id
)
worker
=
opt_info
[
"program_id_to_worker"
][
program_id
]
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
table_id
=
\
i
.
table_id
sparse_len
=
len
(
worker
.
get_desc
().
sparse_table
)
for
i
in
range
(
sparse_len
):
sparse_table
=
downpour
.
sparse_table
.
add
()
sparse_table
.
table_id
=
worker
.
get_desc
().
sparse_table
[
i
].
table_id
sparse_table
.
sparse_key_name
.
extend
(
worker
.
get_desc
()
.
sparse_table
[
i
].
slot_key
)
sparse_table
.
sparse_value_name
.
extend
(
worker
.
get_desc
(
).
sparse_table
[
i
].
slot_value
)
sparse_table
.
sparse_grad_name
.
extend
(
worker
.
get_desc
(
).
sparse_table
[
i
].
slot_gradient
)
sparse_table
.
fea_dim
=
\
self
.
_fleet_desc
.
server_param
.
downpour_server_param
.
downpour_table_param
[
i
].
accessor
.
fea_dim
# not use emb_dim
sparse_table
.
emb_dim
=
-
1
# not use hard code click
sparse_table
.
label_var_name
=
""
for
i
in
worker
.
get_desc
().
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
downpour
.
dense_table
.
add
()
dense_table
.
table_id
=
i
.
table_id
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
dense_grad_name
.
extend
(
i
.
dense_gradient_variable_name
)
downpour
.
skip_ops
.
extend
(
worker
.
get_desc
().
skip_op
)
else
:
dense_table_config
=
opt_info
.
get
(
"dense_table_config"
)
print
(
"device worker dense_table_config:"
,
dense_table_config
)
for
table_id
,
varnames
in
dense_table_config
.
items
():
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
varnames
)
dense_table
.
table_id
=
table_id
if
self
.
_infer
:
downpour
.
skip_ops
.
extend
(
[
"push_sparse"
,
"push_sparse_v2"
,
"push_dense"
])
class
DownpourSGD
(
DeviceWorker
):
"""
DownpourSGD is a kind of distributed SGD algorithm.
...
...
python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py
浏览文件 @
5dcfb699
...
...
@@ -57,8 +57,8 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
()
file1
=
'/ps_log/async_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'/ps_log/async_run_minimize_debug:_1_worker_main.prototxt'
file1
=
'
.
/ps_log/async_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'
.
/ps_log/async_run_minimize_debug:_1_worker_main.prototxt'
if
self
.
check
(
file1
,
file2
):
logger
.
info
(
'test_ps_optimizer_minimize_cpu_async passed!'
)
else
:
...
...
@@ -79,8 +79,8 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
()
'''
file1 = '/ps_log/sync_run_minimize_debug:_0_worker_main.prototxt'
file2 = '/ps_log/sync_run_minimize_debug:_1_worker_main.prototxt'
file1 = '
.
/ps_log/sync_run_minimize_debug:_0_worker_main.prototxt'
file2 = '
.
/ps_log/sync_run_minimize_debug:_1_worker_main.prototxt'
if self.check(file1, file2):
logger.info('test_ps_optimizer_minimize_cpu_sync passed!')
else:
...
...
@@ -102,8 +102,8 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
()
file1
=
'/ps_log/geo_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'/ps_log/geo_run_minimize_debug:_1_worker_main.prototxt'
file1
=
'
.
/ps_log/geo_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'
.
/ps_log/geo_run_minimize_debug:_1_worker_main.prototxt'
if
self
.
check
(
file1
,
file2
):
logger
.
info
(
'test_ps_optimizer_minimize_cpu_geo passed!'
)
else
:
...
...
@@ -130,10 +130,10 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
(
'heter-ps'
)
'''
file1 = '/ps_log/heter_run_minimize_debug:_0_worker_main.prototxt'
file2 = '/ps_log/heter_run_minimize_debug:_1_worker_main.prototxt'
file3 = '/ps_log/heter_run_minimize_debug:_0_heter_worker_main.prototxt'
file4 = '/ps_log/heter_run_minimize_debug:_1_heter_worker_main.prototxt'
file1 = '
.
/ps_log/heter_run_minimize_debug:_0_worker_main.prototxt'
file2 = '
.
/ps_log/heter_run_minimize_debug:_1_worker_main.prototxt'
file3 = '
.
/ps_log/heter_run_minimize_debug:_0_heter_worker_main.prototxt'
file4 = '
.
/ps_log/heter_run_minimize_debug:_1_heter_worker_main.prototxt'
if self.check(file1, file2) and self.check(file3, file4):
logger.info('test_ps_optimizer_minimize_heter passed!')
else:
...
...
@@ -155,8 +155,8 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
(
"gpu-ps"
)
file1
=
'/ps_log/gpubox_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'/ps_log/gpubox_run_minimize_debug:_1_worker_main.prototxt'
file1
=
'
.
/ps_log/gpubox_run_minimize_debug:_0_worker_main.prototxt'
file2
=
'
.
/ps_log/gpubox_run_minimize_debug:_1_worker_main.prototxt'
if
self
.
check
(
file1
,
file2
):
logger
.
info
(
'test_ps_optimizer_minimize_gpu passed!'
)
else
:
...
...
@@ -180,8 +180,8 @@ class TestPsTrainerPass(PsPassTestBase):
remove_path_if_exists
(
self
.
config
[
'log_dir'
])
self
.
ps_launch
(
"cpu-ps"
)
file1
=
'/ps_log/async_append_send_ops_pass_debug:_0_worker_main.prototxt'
file2
=
'/ps_log/async_append_send_ops_pass_debug:_1_worker_main.prototxt'
file1
=
'
.
/ps_log/async_append_send_ops_pass_debug:_0_worker_main.prototxt'
file2
=
'
.
/ps_log/async_append_send_ops_pass_debug:_1_worker_main.prototxt'
if
self
.
check
(
file1
,
file2
):
logger
.
info
(
'test_append_send_ops_pass passed!'
)
else
:
...
...
@@ -192,5 +192,5 @@ class TestPsTrainerPass(PsPassTestBase):
if
__name__
==
'__main__'
:
remove_path_if_exists
(
'/ps_log'
)
remove_path_if_exists
(
'
.
/ps_log'
)
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py
浏览文件 @
5dcfb699
...
...
@@ -50,11 +50,11 @@ class MKLDNNBF16ActivationOp(object):
self
.
dtype
=
np
.
uint16
self
.
init_data
()
self
.
config
()
self
.
set_attrs
()
self
.
out
=
self
.
op_forward
(
self
.
x
)
self
.
inputs
=
{
'X'
:
convert_float_to_uint16
(
self
.
x
)}
self
.
outputs
=
{
'Out'
:
self
.
out
}
self
.
set_attrs
()
def
calculate_grads
(
self
):
self
.
dx
=
self
.
op_grad
(
self
.
out
,
self
.
x
)
...
...
@@ -162,5 +162,110 @@ class TestMKLDNNMishBF16Op(MKLDNNBF16ActivationOp, TestActivation):
return
dout
*
((
np
.
exp
(
x
)
*
omega
)
/
delta
**
2
)
class
TestMKLDNNRelu6BF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"relu6"
def
op_forward
(
self
,
x
):
return
np
.
clip
(
x
,
0
,
6
)
def
op_grad
(
self
,
dout
,
x
):
return
np
.
where
((
x
>
0
)
&
(
x
<=
6
),
dout
,
0
)
class
TestMKLDNNLeakyReluBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"leaky_relu"
def
op_forward
(
self
,
x
):
return
np
.
where
(
x
>
0
,
x
,
self
.
alpha
*
x
)
def
op_grad
(
self
,
dout
,
x
):
return
np
.
where
(
x
>
0
,
dout
,
self
.
alpha
*
dout
)
def
set_attrs
(
self
):
self
.
alpha
=
0.2
self
.
attrs
=
{
"use_mkldnn"
:
True
,
"alpha"
:
self
.
alpha
}
class
TestMKLDNNSwishBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"swish"
def
expit
(
self
,
val
):
return
1
/
(
1
+
np
.
exp
(
-
self
.
beta
*
val
))
def
op_forward
(
self
,
x
):
return
x
*
self
.
expit
(
x
)
def
op_grad
(
self
,
dout
,
x
):
return
dout
*
self
.
expit
(
x
)
*
(
1
+
self
.
beta
*
x
*
(
1
-
self
.
expit
(
x
)))
def
set_attrs
(
self
):
self
.
beta
=
0.2
self
.
attrs
=
{
"use_mkldnn"
:
True
,
"beta"
:
self
.
beta
}
class
TestMKLDNNHardSwishBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"hard_swish"
def
op_forward
(
self
,
x
):
result
=
np
.
where
(
x
<
-
3
,
0
,
x
)
return
np
.
where
(
result
>
3
,
result
,
result
*
(
result
+
3
)
/
6
)
def
op_grad
(
self
,
dout
,
x
):
result
=
np
.
where
(
x
<
-
3
,
0
,
x
)
return
np
.
where
(
result
>
3
,
dout
,
dout
*
(
2
*
x
+
3
)
/
6
)
class
TestMKLDNNTanhBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"tanh"
def
op_forward
(
self
,
x
):
return
np
.
tanh
(
x
)
def
op_grad
(
self
,
dout
,
x
):
return
dout
*
(
1
-
np
.
tanh
(
x
)
**
2
)
class
TestMKLDNNAbsBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"abs"
def
op_forward
(
self
,
x
):
return
np
.
absolute
(
x
)
def
op_grad
(
self
,
dout
,
x
):
return
dout
*
np
.
sign
(
x
)
class
TestMKLDNNEluBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"elu"
def
op_forward
(
self
,
x
):
return
np
.
where
(
x
>
0
,
x
,
self
.
alpha
*
(
np
.
exp
(
x
)
-
1
))
def
op_grad
(
self
,
dout
,
x
):
return
np
.
where
(
x
>
0
,
dout
,
dout
*
self
.
alpha
*
np
.
exp
(
x
))
def
set_attrs
(
self
):
self
.
alpha
=
0.2
self
.
attrs
=
{
"use_mkldnn"
:
True
,
"alpha"
:
self
.
alpha
}
class
TestMKLDNNExpBF16Op
(
MKLDNNBF16ActivationOp
,
TestActivation
):
def
config
(
self
):
self
.
op_type
=
"exp"
def
op_forward
(
self
,
x
):
return
np
.
exp
(
x
)
def
op_grad
(
self
,
dout
,
x
):
return
dout
*
np
.
exp
(
x
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
浏览文件 @
5dcfb699
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
OpTestTool
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
OpTestTool
,
convert_float_to_uint16
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
...
...
@@ -30,23 +30,32 @@ def ref_softplus(x, beta, threshold):
return
out
@
OpTestTool
.
skip_if
(
not
(
isinstance
(
_current_expected_place
(),
core
.
CPUPlace
)),
"GPU is not supported"
)
@
OpTestTool
.
skip_if_not_cpu_bf16
()
class
TestSoftplusOneDNNOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"softplus"
self
.
beta
=
1
self
.
threshold
=
20
self
.
config
()
self
.
set_dtype
()
self
.
attrs
=
{
'use_mkldnn'
:
True
,
'beta'
:
self
.
beta
}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
x_shape
).
astype
(
np
.
float32
)}
self
.
x
=
np
.
random
.
random
(
self
.
x_shape
)
self
.
out
=
ref_softplus
(
self
.
x
,
self
.
beta
,
self
.
threshold
)
if
self
.
dtype
!=
np
.
float32
:
self
.
x
=
convert_float_to_uint16
(
self
.
x
)
self
.
inputs
=
{
'X'
:
self
.
out
}
self
.
outputs
=
{
'Out'
:
ref_softplus
(
self
.
inputs
[
'X'
]
,
self
.
beta
,
self
.
threshold
)
'Out'
:
ref_softplus
(
self
.
out
,
self
.
beta
,
self
.
threshold
)
}
def
config
(
self
):
self
.
x_shape
=
(
10
,
10
)
def
set_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
self
.
check_output
()
...
...
@@ -73,6 +82,27 @@ class TestSoftplus3DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp):
self
.
beta
=
0.4
class
TestSoftplusBF16OneDNNOp
(
TestSoftplusOneDNNOp
):
def
set_dtype
(
self
):
self
.
dtype
=
np
.
uint16
class
TestSoftplus4DBF16OneDNNOp
(
TestSoftplus4DOneDNNOp
):
def
set_dtype
(
self
):
self
.
dtype
=
np
.
uint16
class
TestSoftplus6DBF16OneDNNOp
(
TestSoftplus6DOneDNNOp
):
def
set_dtype
(
self
):
self
.
dtype
=
np
.
uint16
class
TestSoftplus3DExtendedFunctorBF16OneDNNOp
(
TestSoftplus3DExtendedFunctorOneDNNOp
):
def
set_dtype
(
self
):
self
.
dtype
=
np
.
uint16
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
0 → 100644
浏览文件 @
5dcfb699
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
浏览文件 @
5dcfb699
...
...
@@ -26,7 +26,7 @@ import paddle
from
paddle.fluid.tests.unittests.distributed_passes.ps_pass_test_base
import
*
from
paddle.distributed.ps.utils.public
import
logger
,
ps_log_root_dir
from
ps_dnn_trainer
import
DnnTrainer
from
paddle.distributed.fleet.proto
import
ps_pb2
import
paddle.distributed.fleet.proto.the_one_ps_pb2
as
ps_pb2
from
google.protobuf
import
text_format
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
浏览文件 @
5dcfb699
...
...
@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
paddle
import
os
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
time
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
浏览文件 @
5dcfb699
...
...
@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
paddle
import
os
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
time
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
浏览文件 @
5dcfb699
...
...
@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
paddle
import
os
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
time
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
浏览文件 @
5dcfb699
...
...
@@ -309,7 +309,7 @@ class TestFleetBase(unittest.TestCase):
(
tr1_proc
,
tr1_out
,
tr1_err
,
tr1_out_log
,
tr1_err_log
))
def
_run_cluster
(
self
,
model
,
envs
):
env
=
{
'GRAD_CLIP'
:
str
(
self
.
_grad_clip_mode
)}
env
=
{
'GRAD_CLIP'
:
str
(
self
.
_grad_clip_mode
)
,
'WITH_DISTRIBUTE'
:
'ON'
}
python_path
=
self
.
_python_interp
gloo_path
=
tempfile
.
mkdtemp
()
...
...
@@ -343,7 +343,8 @@ class TestFleetBase(unittest.TestCase):
tr1_proc
,
tr1_out
,
tr1_err
,
tr1_out_log
,
tr1_err_log
=
tr1
# Wait until trainer process terminate
time_out
=
120
#time_out = 120
time_out
=
60
cur_time
=
0
while
True
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
浏览文件 @
5dcfb699
...
...
@@ -51,8 +51,9 @@ class TestDistMnistAsyncInMemoryDataset2x2(TestFleetBase):
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_fleet_ctr.py"
,
delta
=
1e-5
,
check_error_log
=
False
)
# self.check_with_place(
# "dist_fleet_ctr.py", delta=1e-5, check_error_log=False)
print
(
'recover later'
)
class
TestDistMnistAsync2x2
(
TestFleetBase
):
...
...
@@ -85,8 +86,9 @@ class TestDistMnistAsync2x2(TestFleetBase):
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_fleet_ctr.py"
,
delta
=
1e-5
,
check_error_log
=
False
)
# self.check_with_place(
# "dist_fleet_ctr.py", delta=1e-5, check_error_log=False)
print
(
'recover later'
)
class
TestDistCtrHalfAsync2x2
(
TestFleetBase
):
...
...
@@ -122,8 +124,9 @@ class TestDistCtrHalfAsync2x2(TestFleetBase):
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_fleet_ctr.py"
,
delta
=
1e-5
,
check_error_log
=
False
)
# self.check_with_place(
# "dist_fleet_ctr.py", delta=1e-5, check_error_log=False)
print
(
'recover later'
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
浏览文件 @
5dcfb699
...
...
@@ -52,8 +52,9 @@ class TestDistMnistSync2x2(TestFleetBase):
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_fleet_ctr.py"
,
delta
=
1e-5
,
check_error_log
=
False
)
# self.check_with_place(
# "dist_fleet_ctr.py", delta=1e-5, check_error_log=False)
print
(
'recover later'
)
# @unittest.skip(reason="Skip unstable ut, reader need to be rewrite")
...
...
@@ -91,8 +92,9 @@ class TestDistMnistAsyncDataset2x2(TestFleetBase):
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
)
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_fleet_ctr.py"
,
delta
=
1e-5
,
check_error_log
=
False
)
# self.check_with_place(
# "dist_fleet_ctr.py", delta=1e-5, check_error_log=False)
print
(
'recover later'
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
浏览文件 @
5dcfb699
...
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
paddle
import
paddle.fluid
as
fluid
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
浏览文件 @
5dcfb699
...
...
@@ -13,14 +13,14 @@
# limitations under the License.
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
paddle.fluid
as
fluid
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.distributed.fleet
as
fleet
import
unittest
import
paddle
import
os
paddle
.
enable_static
()
# For Net
...
...
@@ -74,11 +74,12 @@ class TestExponentialDecay(unittest.TestCase):
strategy
=
paddle
.
distributed
.
fleet
.
DistributedStrategy
()
strategy
.
a_sync
=
True
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
)
optimizer
.
minimize
(
loss
)
optimizer
.
minimize
(
[
loss
]
)
fleet
.
init_server
()
if
__name__
==
'__main__'
:
os
.
environ
[
"GLOG_v"
]
=
"4"
os
.
environ
[
"GLOG_logtostderr"
]
=
"1"
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
浏览文件 @
5dcfb699
...
...
@@ -15,6 +15,8 @@
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
tempfile
import
shutil
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
浏览文件 @
5dcfb699
...
...
@@ -15,6 +15,8 @@
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
unittest
import
tempfile
import
shutil
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
浏览文件 @
5dcfb699
...
...
@@ -13,10 +13,12 @@
# limitations under the License.
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.fluid
as
fluid
import
os
import
unittest
import
paddle
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
浏览文件 @
5dcfb699
...
...
@@ -13,10 +13,11 @@
# limitations under the License.
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.fluid
as
fluid
import
os
import
unittest
import
paddle
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
浏览文件 @
5dcfb699
...
...
@@ -13,10 +13,11 @@
# limitations under the License.
from
__future__
import
print_function
import
os
os
.
environ
[
"WITH_DISTRIBUTE"
]
=
"ON"
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
paddle.fluid
as
fluid
import
os
import
unittest
import
paddle
paddle
.
enable_static
()
...
...
python/paddle/fluid/trainer_factory.py
浏览文件 @
5dcfb699
...
...
@@ -23,7 +23,7 @@ local_logger = get_logger(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
from
.trainer_desc
import
MultiTrainer
,
DistMultiTrainer
,
PipelineTrainer
,
HeterXpuTrainer
,
PSGPUTrainer
,
HeterPipelineTrainer
from
.device_worker
import
Hogwild
,
DownpourSGD
,
Section
,
DownpourSGDOPT
,
HeterSection
from
.device_worker
import
Hogwild
,
DownpourSGD
,
DownpourLite
,
Section
,
DownpourSGDOPT
,
HeterSection
from
.framework
import
Variable
from
multiprocessing
import
Process
,
Manager
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录