Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dc488c17
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dc488c17
编写于
1月 11, 2018
作者:
Y
Yang Yu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:baidu/Paddle into feature/parallel_for_unittest
上级
12aca860
87f9b583
变更
41
隐藏空白更改
内联
并排
Showing
41 changed file
with
1930 addition
and
572 deletion
+1930
-572
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+6
-4
paddle/framework/data_device_transform.cc
paddle/framework/data_device_transform.cc
+1
-1
paddle/framework/data_device_transform.h
paddle/framework/data_device_transform.h
+0
-0
paddle/framework/data_device_transform_test.cu
paddle/framework/data_device_transform_test.cu
+0
-0
paddle/framework/data_layout.h
paddle/framework/data_layout.h
+0
-1
paddle/framework/data_layout_transform.cc
paddle/framework/data_layout_transform.cc
+82
-0
paddle/framework/data_layout_transform.h
paddle/framework/data_layout_transform.h
+31
-0
paddle/framework/data_transform.cc
paddle/framework/data_transform.cc
+2
-139
paddle/framework/data_transform.h
paddle/framework/data_transform.h
+0
-140
paddle/framework/data_transform_test.cc
paddle/framework/data_transform_test.cc
+0
-168
paddle/framework/data_type_transform.cc
paddle/framework/data_type_transform.cc
+99
-0
paddle/framework/data_type_transform.h
paddle/framework/data_type_transform.h
+31
-0
paddle/framework/operator.cc
paddle/framework/operator.cc
+0
-1
paddle/gserver/layers/MKLDNNLayer.cpp
paddle/gserver/layers/MKLDNNLayer.cpp
+2
-0
paddle/gserver/layers/MKLDNNLayer.h
paddle/gserver/layers/MKLDNNLayer.h
+2
-0
paddle/operators/detail/CMakeLists.txt
paddle/operators/detail/CMakeLists.txt
+1
-1
paddle/operators/detail/grpc_client.cc
paddle/operators/detail/grpc_client.cc
+147
-0
paddle/operators/detail/grpc_client.h
paddle/operators/detail/grpc_client.h
+147
-0
paddle/operators/detail/grpc_server.cc
paddle/operators/detail/grpc_server.cc
+237
-0
paddle/operators/detail/grpc_server.h
paddle/operators/detail/grpc_server.h
+91
-0
paddle/operators/detail/send_recv.proto
paddle/operators/detail/send_recv.proto
+0
-2
paddle/operators/detail/sendrecvop_utils.cc
paddle/operators/detail/sendrecvop_utils.cc
+68
-0
paddle/operators/detail/sendrecvop_utils.h
paddle/operators/detail/sendrecvop_utils.h
+42
-0
paddle/operators/edit_distance_op.cc
paddle/operators/edit_distance_op.cc
+98
-0
paddle/operators/edit_distance_op.cu
paddle/operators/edit_distance_op.cu
+149
-0
paddle/operators/edit_distance_op.h
paddle/operators/edit_distance_op.h
+96
-0
paddle/operators/recv_op.cc
paddle/operators/recv_op.cc
+20
-23
paddle/operators/send_op.cc
paddle/operators/send_op.cc
+21
-35
paddle/operators/send_recv_op_test.cc
paddle/operators/send_recv_op_test.cc
+1
-1
paddle/operators/sequence_erase_op.cc
paddle/operators/sequence_erase_op.cc
+89
-0
paddle/operators/sequence_erase_op.cu
paddle/operators/sequence_erase_op.cu
+133
-0
paddle/operators/sequence_erase_op.h
paddle/operators/sequence_erase_op.h
+70
-0
paddle/operators/shrink_rnn_memory_op.cc
paddle/operators/shrink_rnn_memory_op.cc
+20
-5
paddle/scripts/submit_local.sh.in
paddle/scripts/submit_local.sh.in
+3
-0
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+22
-13
python/paddle/v2/fluid/layers/nn.py
python/paddle/v2/fluid/layers/nn.py
+16
-1
python/paddle/v2/fluid/layers/ops.py
python/paddle/v2/fluid/layers/ops.py
+1
-12
python/paddle/v2/fluid/tests/test_edit_distance_op.py
python/paddle/v2/fluid/tests/test_edit_distance_op.py
+94
-0
python/paddle/v2/fluid/tests/test_sequence_erase_op.py
python/paddle/v2/fluid/tests/test_sequence_erase_op.py
+35
-0
python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
+67
-24
python/setup.py.in
python/setup.py.in
+6
-1
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
dc488c17
...
...
@@ -32,10 +32,12 @@ cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_library
(
device_data_transform SRCS device_data_transform.cc DEPS tensor
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
cc_library
(
data_type_transform SRCS data_type_transform.cc DEPS tensor
)
cc_library
(
data_layout_transform SRCS data_layout_transform.cc DEPS tensor math_function
)
cc_library
(
data_transform SRCS data_transform.cc DEPS math_function tensor
framework_proto selected_rows device_data_transform
)
cc_test
(
data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context
)
cc_library
(
data_transform SRCS data_transform.cc DEPS math_function tensor
framework_proto selected_rows data_device_transform data_type_transform data_layout_transform
)
cc_library
(
attribute SRCS attribute.cc DEPS framework_proto
)
cc_test
(
program_desc_test SRCS program_desc_test.cc DEPS proto_desc
...
...
@@ -80,5 +82,5 @@ cc_test(init_test SRCS init_test.cc DEPS init)
cc_test
(
op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto
)
cc_test
(
cow_ptr_tests SRCS details/cow_ptr_test.cc
)
nv_test
(
d
evice_data_transform_test SRCS device_data
_transform_test.cu
nv_test
(
d
ata_device_transform_test SRCS data_device
_transform_test.cu
DEPS operator op_registry init math_function
)
paddle/framework/d
evice_data
_transform.cc
→
paddle/framework/d
ata_device
_transform.cc
浏览文件 @
dc488c17
...
...
@@ -11,7 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/d
evice_data
_transform.h"
#include "paddle/framework/d
ata_device
_transform.h"
namespace
paddle
{
namespace
framework
{
...
...
paddle/framework/d
evice_data
_transform.h
→
paddle/framework/d
ata_device
_transform.h
浏览文件 @
dc488c17
文件已移动
paddle/framework/d
evice_data
_transform_test.cu
→
paddle/framework/d
ata_device
_transform_test.cu
浏览文件 @
dc488c17
文件已移动
paddle/framework/data_layout.h
浏览文件 @
dc488c17
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/platform/enforce.h"
#include <iostream>
#include "paddle/platform/enforce.h"
...
...
paddle/framework/data_layout_transform.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/data_layout_transform.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
namespace
framework
{
struct
CastDataLayout
{
CastDataLayout
(
const
platform
::
DeviceContext
*
ctx
,
const
std
::
vector
<
int
>&
axis
,
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
),
axis_
(
axis
)
{}
const
framework
::
Tensor
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
DeviceContext
*
ctx_
;
const
std
::
vector
<
int
>
axis_
;
template
<
typename
T
>
void
operator
()()
{
auto
place
=
ctx_
->
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
operators
::
math
::
Transpose
<
platform
::
CPUDeviceContext
,
T
,
4
>
trans4
;
auto
*
context
=
static_cast
<
const
platform
::
CPUDeviceContext
*>
(
ctx_
);
trans4
(
*
context
,
in_
,
out_
,
axis_
);
}
else
{
PADDLE_THROW
(
"Unsupport CPU <-> GPU!"
);
}
}
};
void
TransDataLayout
(
const
std
::
vector
<
int
>&
axis
,
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
)
{
PADDLE_ENFORCE
(
in
.
IsType
<
Tensor
>
(),
"Only support Tensor transform!."
);
PADDLE_ENFORCE
(
platform
::
places_are_same_class
(
kernel_pair
.
first
.
place_
,
kernel_pair
.
second
.
place_
),
"TransDataLayout only support DataLayout transform on same place!"
);
PADDLE_ENFORCE
(
kernel_pair
.
first
.
data_type_
==
kernel_pair
.
second
.
data_type_
,
"TransDataLayout only support Datatype are same!"
);
auto
src
=
in
.
Get
<
Tensor
>
();
auto
*
dst
=
out
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
arity
(
src
.
dims
())
==
4
,
"Input Arity Only Suppport 4!"
);
auto
src_dim
=
src
.
dims
();
std
::
vector
<
int64_t
>
dst_dim
;
dst_dim
.
resize
(
axis
.
size
());
for
(
size_t
i
=
0
;
i
<
axis
.
size
();
i
++
)
{
dst_dim
[
i
]
=
src_dim
[
axis
[
i
]];
}
dst
->
Resize
(
make_ddim
(
dst_dim
));
auto
place
=
kernel_pair
.
second
.
place_
;
dst
->
mutable_data
(
place
,
src
.
type
());
auto
src_type
=
kernel_pair
.
first
.
data_type_
;
framework
::
VisitDataType
(
src_type
,
CastDataLayout
(
ctx
,
axis
,
src
,
dst
));
dst
->
set_layout
(
kernel_pair
.
second
.
data_layout_
);
}
}
// namespace framework
}
// namespace paddle
paddle/
operators/detail/recv_impl.cc
→
paddle/
framework/data_layout_transform.h
浏览文件 @
dc488c17
...
...
@@ -12,54 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#
include "send_recv_impl.h"
#
pragma once
namespace
paddle
{
namespace
operators
{
namespace
detail
{
Status
SendRecvServerImpl
::
SendVariable
(
ServerContext
*
context
,
const
VariableMessage
*
in_var
,
VoidMessage
*
out_var
)
{
MessageWithName
msg_with_name
=
std
::
make_pair
(
in_var
->
varname
(),
std
::
move
(
*
in_var
));
var_recv_queue_
.
Push
(
std
::
move
(
msg_with_name
));
return
Status
::
OK
;
}
Status
SendRecvServerImpl
::
GetVariable
(
ServerContext
*
context
,
const
VariableMessage
*
in_var
,
VariableMessage
*
out_var
)
{
std
::
string
get_var_name
=
in_var
->
varname
();
auto
*
var
=
scope_
->
FindVar
(
get_var_name
);
#include "paddle/framework/op_kernel_type.h"
#include "paddle/framework/variable.h"
SerializeToMessage
(
get_var_name
,
var
,
platform
::
CPUDeviceContext
(),
out_var
);
return
Status
::
OK
;
}
Status
SendRecvServerImpl
::
Wait
(
ServerContext
*
context
,
const
VoidMessage
*
in_var
,
VoidMessage
*
out_var
)
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
condition_
.
wait
(
lock
,
[
=
]
{
return
this
->
done_
==
true
;
});
}
return
Status
::
OK
;
}
namespace
paddle
{
namespace
framework
{
void
SendRecvServerImpl
::
Reset
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
this
->
mutex_
);
done_
=
false
;
}
using
KernelTypePair
=
std
::
pair
<
OpKernelType
,
OpKernelType
>
;
void
SendRecvServerImpl
::
Done
()
{
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
this
->
mutex_
);
done_
=
true
;
}
condition_
.
notify_all
();
}
void
TransDataLayout
(
const
std
::
vector
<
int
>&
axis
,
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
);
}
// namespace detail
}
// namespace operators
}
// namespace framework
}
// namespace paddle
paddle/framework/data_transform.cc
浏览文件 @
dc488c17
...
...
@@ -11,22 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <functional>
#include "paddle/framework/data_transform.h"
#include "paddle/framework/device_data_transform.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/platform/device_context.h"
#include "paddle/framework/data_device_transform.h"
namespace
paddle
{
namespace
framework
{
DataTransformFnMap
&
DataTransformFnMap
::
Instance
()
{
static
DataTransformFnMap
data_transform_map
;
return
data_transform_map
;
}
Tensor
*
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
const
OpKernelType
&
kernel_type_for_var
,
const
Tensor
&
input_tensor
)
{
...
...
@@ -58,134 +50,5 @@ void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
}
}
auto
KernelFP32
=
OpKernelType
(
proto
::
DataType
::
FP32
,
platform
::
CPUPlace
(),
DataLayout
::
kNHWC
,
LibraryType
::
kPlain
);
auto
KernelFP64
=
OpKernelType
(
proto
::
DataType
::
FP64
,
platform
::
CPUPlace
(),
DataLayout
::
kNHWC
,
LibraryType
::
kPlain
);
auto
KernelNHWC
=
OpKernelType
(
proto
::
DataType
::
FP64
,
platform
::
CPUPlace
(),
DataLayout
::
kNHWC
,
LibraryType
::
kPlain
);
auto
KernelNCHW
=
OpKernelType
(
proto
::
DataType
::
FP64
,
platform
::
CPUPlace
(),
DataLayout
::
kNCHW
,
LibraryType
::
kPlain
);
// TODO(dzhwinter): Only for testing multiple op kernel.
// Dummy transform function for library_type
// should be removed.
auto
KernelPlain
=
OpKernelType
(
proto
::
DataType
::
FP32
,
platform
::
CUDAPlace
(
0
),
DataLayout
::
kAnyLayout
,
LibraryType
::
kPlain
);
auto
KernelCUDNN
=
OpKernelType
(
proto
::
DataType
::
FP32
,
platform
::
CUDAPlace
(
0
),
DataLayout
::
kAnyLayout
,
LibraryType
::
kCUDNN
);
void
DummyTrans
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
)
{
PADDLE_ENFORCE
(
in
.
IsType
<
Tensor
>
(),
"Only Support Tensor transform!."
);
PADDLE_ENFORCE
(
platform
::
places_are_same_class
(
kernel_pair
.
first
.
place_
,
kernel_pair
.
second
.
place_
),
"TransDataType Only Support DataType transform on same place!"
);
auto
src
=
in
.
Get
<
Tensor
>
();
auto
*
dst
=
out
->
GetMutable
<
Tensor
>
();
*
dst
=
src
;
}
void
TransDataType
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
)
{
PADDLE_ENFORCE
(
in
.
IsType
<
Tensor
>
(),
"Only Support Tensor transform!."
);
PADDLE_ENFORCE
(
platform
::
places_are_same_class
(
kernel_pair
.
first
.
place_
,
kernel_pair
.
second
.
place_
),
"TransDataType Only Support DataType transform on same place!"
);
auto
src
=
in
.
Get
<
Tensor
>
();
auto
*
dst
=
out
->
GetMutable
<
Tensor
>
();
auto
dims
=
src
.
dims
();
dst
->
Resize
(
dims
);
auto
dst_type
=
kernel_pair
.
second
.
data_type_
;
auto
src_type
=
kernel_pair
.
first
.
data_type_
;
switch
(
src_type
)
{
case
proto
::
DataType
::
FP32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
float
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
FP64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
double
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
INT32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
INT64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int64_t
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
BOOL
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
bool
>
(
src
,
dst
,
ctx
));
break
;
default:
PADDLE_THROW
(
"Not support type %d"
,
src_type
);
}
}
void
TransDataLayout
(
const
std
::
vector
<
int
>&
axis
,
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
)
{
PADDLE_ENFORCE
(
in
.
IsType
<
Tensor
>
(),
"Only support Tensor transform!."
);
PADDLE_ENFORCE
(
platform
::
places_are_same_class
(
kernel_pair
.
first
.
place_
,
kernel_pair
.
second
.
place_
),
"TransDataLayout only support DataLayout transform on same place!"
);
PADDLE_ENFORCE
(
kernel_pair
.
first
.
data_type_
==
kernel_pair
.
second
.
data_type_
,
"TransDataLayout only support Datatype are same!"
);
auto
src
=
in
.
Get
<
Tensor
>
();
auto
*
dst
=
out
->
GetMutable
<
Tensor
>
();
PADDLE_ENFORCE
(
arity
(
src
.
dims
())
==
4
,
"Input Arity Only Suppport 4!"
);
auto
src_dim
=
src
.
dims
();
std
::
vector
<
int64_t
>
dst_dim
;
dst_dim
.
resize
(
axis
.
size
());
for
(
size_t
i
=
0
;
i
<
axis
.
size
();
i
++
)
{
dst_dim
[
i
]
=
src_dim
[
axis
[
i
]];
}
dst
->
Resize
(
make_ddim
(
dst_dim
));
auto
place
=
kernel_pair
.
second
.
place_
;
dst
->
mutable_data
(
place
,
src
.
type
());
auto
src_type
=
kernel_pair
.
first
.
data_type_
;
framework
::
VisitDataType
(
src_type
,
CastDataLayout
(
ctx
,
axis
,
src
,
dst
));
dst
->
set_layout
(
kernel_pair
.
second
.
data_layout_
);
}
}
// namespace framework
}
// namespace paddle
namespace
f
=
paddle
::
framework
;
namespace
{
std
::
vector
<
int
>
NHWC2NCHW
=
{
0
,
3
,
1
,
2
};
std
::
vector
<
int
>
NCHW2NHWC
=
{
0
,
2
,
3
,
1
};
}
REGISTER_DATA_TRANSFORM_FN
(
f
::
KernelFP32
,
f
::
KernelFP64
,
f
::
TransDataType
);
REGISTER_DATA_TRANSFORM_FN
(
f
::
KernelPlain
,
f
::
KernelCUDNN
,
f
::
DummyTrans
);
REGISTER_DATA_TRANSFORM_FN
(
f
::
KernelCUDNN
,
f
::
KernelPlain
,
f
::
DummyTrans
);
REGISTER_DATA_TRANSFORM_FN
(
f
::
KernelNHWC
,
f
::
KernelNCHW
,
std
::
bind
(
f
::
TransDataLayout
,
NHWC2NCHW
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
));
REGISTER_DATA_TRANSFORM_FN
(
f
::
KernelNCHW
,
f
::
KernelNHWC
,
std
::
bind
(
f
::
TransDataLayout
,
NCHW2NHWC
,
std
::
placeholders
::
_1
,
std
::
placeholders
::
_2
,
std
::
placeholders
::
_3
,
std
::
placeholders
::
_4
));
paddle/framework/data_transform.h
浏览文件 @
dc488c17
...
...
@@ -30,26 +30,6 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
using
KernelTypePair
=
std
::
pair
<
OpKernelType
,
OpKernelType
>
;
using
DataTransformFn
=
std
::
function
<
void
(
const
platform
::
DeviceContext
*
,
const
KernelTypePair
&
,
const
Variable
&
,
Variable
*
)
>
;
struct
KernelTypePairHash
{
static
void
HashCombine
(
const
OpKernelType
&
t
,
std
::
size_t
*
seed
)
{
OpKernelType
::
Hash
kernel_type_hasher
;
(
*
seed
)
^=
kernel_type_hasher
(
t
)
+
0x9e3779b9
+
(
*
seed
<<
6
)
+
(
*
seed
>>
2
);
}
size_t
operator
()(
const
KernelTypePair
&
kernel_pair
)
const
{
std
::
size_t
seed
=
0
;
HashCombine
(
kernel_pair
.
first
,
&
seed
);
HashCombine
(
kernel_pair
.
second
,
&
seed
);
return
seed
;
}
};
Tensor
*
DataTransform
(
const
OpKernelType
&
expected_kernel_type
,
const
OpKernelType
&
kernel_type_for_var
,
const
Tensor
&
input_tensor
);
...
...
@@ -57,125 +37,5 @@ Tensor* DataTransform(const OpKernelType& expected_kernel_type,
void
CopyVariableWithTensor
(
const
Variable
&
in_var
,
const
Tensor
&
tensor
,
Variable
&
out_var
);
template
<
typename
InType
,
typename
OutType
>
struct
CastDataTypeFunctor
{
HOSTDEVICE
inline
OutType
operator
()(
InType
in
)
const
{
return
static_cast
<
OutType
>
(
in
);
}
};
template
<
typename
InType
>
struct
CastDataType
{
CastDataType
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
platform
::
DeviceContext
*
ctx
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
)
{}
const
framework
::
Tensor
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
DeviceContext
*
ctx_
;
template
<
typename
OutType
>
void
operator
()()
{
auto
place
=
ctx_
->
GetPlace
();
auto
*
in_begin
=
in_
.
data
<
InType
>
();
auto
numel
=
in_
.
numel
();
auto
*
in_end
=
in_begin
+
numel
;
auto
*
out_begin
=
out_
->
mutable_data
<
OutType
>
(
place
);
if
(
platform
::
is_cpu_place
(
place
))
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
auto
*
context
=
static_cast
<
const
platform
::
CPUDeviceContext
*>
(
ctx_
);
trans
(
*
context
,
in_begin
,
in_end
,
out_begin
,
CastDataTypeFunctor
<
InType
,
OutType
>
());
}
else
{
// TODO(dzhwinter): enhance Copy CPU<->GPU with different data type?
PADDLE_THROW
(
"Unsupport CPU <-> GPU!"
);
}
}
};
struct
CastDataLayout
{
CastDataLayout
(
const
platform
::
DeviceContext
*
ctx
,
const
std
::
vector
<
int
>&
axis
,
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
),
axis_
(
axis
)
{}
const
framework
::
Tensor
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
DeviceContext
*
ctx_
;
const
std
::
vector
<
int
>
axis_
;
template
<
typename
T
>
void
operator
()()
{
auto
place
=
ctx_
->
GetPlace
();
if
(
platform
::
is_cpu_place
(
place
))
{
operators
::
math
::
Transpose
<
platform
::
CPUDeviceContext
,
T
,
4
>
trans4
;
auto
*
context
=
static_cast
<
const
platform
::
CPUDeviceContext
*>
(
ctx_
);
trans4
(
*
context
,
in_
,
out_
,
axis_
);
}
else
{
PADDLE_THROW
(
"Unsupport CPU <-> GPU!"
);
}
}
};
using
DataTransformMap
=
std
::
unordered_map
<
KernelTypePair
,
DataTransformFn
,
KernelTypePairHash
>
;
class
DataTransformFnMap
{
public:
static
DataTransformFnMap
&
Instance
();
bool
Has
(
const
KernelTypePair
&
key_pair
)
const
{
return
map_
.
find
(
key_pair
)
!=
map_
.
end
();
}
void
Insert
(
const
OpKernelType
&
left
,
const
OpKernelType
&
right
,
const
DataTransformFn
&
data_tranform_fn
)
{
Insert
(
std
::
make_pair
(
left
,
right
),
data_tranform_fn
);
}
void
Insert
(
const
KernelTypePair
&
kernel_type_pair
,
const
DataTransformFn
&
data_tranform_fn
)
{
PADDLE_ENFORCE
(
!
Has
(
kernel_type_pair
),
"KernelTypePair %s has been registered"
,
""
);
map_
.
insert
({
kernel_type_pair
,
data_tranform_fn
});
}
const
DataTransformFn
&
Get
(
const
KernelTypePair
&
key_pair
)
const
{
auto
data_transformer
=
GetNullable
(
key_pair
);
PADDLE_ENFORCE_NOT_NULL
(
data_transformer
,
"DataTransformFn should not be NULL"
);
return
*
data_transformer
;
}
const
DataTransformFn
*
GetNullable
(
const
KernelTypePair
&
key_pair
)
const
{
auto
it
=
map_
.
find
(
key_pair
);
if
(
it
==
map_
.
end
())
{
return
nullptr
;
}
else
{
return
&
(
it
->
second
);
}
}
const
DataTransformMap
&
Map
()
const
{
return
map_
;
}
private:
DataTransformFnMap
()
=
default
;
DataTransformMap
map_
;
DISABLE_COPY_AND_ASSIGN
(
DataTransformFnMap
);
};
// generate unique name with __LINE__
// refs https://stackoverflow.com/questions/1597007
#define TOKENPASTE(x, y) x##y
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
#define REGISTER_DATA_TRANSFORM_FN(from, to, fn) \
static int TOKENPASTE2(fn_, __LINE__)() { \
::paddle::framework::DataTransformFnMap::Instance().Insert(from, to, fn); \
return 0; \
} \
static int TOKENPASTE2(var_, __LINE__) __attribute__((unused)) = \
TOKENPASTE2(fn_, __LINE__)()
}
// namespace framework
}
// namespace paddle
paddle/framework/data_transform_test.cc
已删除
100644 → 0
浏览文件 @
12aca860
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <array>
#include <vector>
#include <gtest/gtest.h>
#include "paddle/framework/data_transform.h"
#include "paddle/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
using
namespace
platform
;
/**
* @brief cross validation of different kernel type transform
* We use four bit map represent different combination.
* If the field has multiple possible value, only choose two of them.
* For DataType, only test the FP32(float), FP64(double).
* e.g. 0000 -> FP32, CPUPlace, kNHWC, kPlain
* 1111 -> FP64, GPUPlace, kNCHW, kMKLDNN
*/
std
::
array
<
proto
::
DataType
,
2
>
kDataType
=
{
{
proto
::
DataType
::
FP32
,
proto
::
DataType
::
FP64
}};
std
::
array
<
Place
,
2
>
kPlace
=
{{
CPUPlace
(),
CUDAPlace
(
0
)}};
std
::
array
<
DataLayout
,
2
>
kDataLayout
=
{{
DataLayout
::
kNHWC
,
DataLayout
::
kNCHW
,
}};
std
::
array
<
LibraryType
,
2
>
kLibraryType
=
{{
LibraryType
::
kPlain
,
LibraryType
::
kMKLDNN
,
}};
OpKernelType
GenFromBit
(
const
std
::
vector
<
bool
>
bits
)
{
return
OpKernelType
(
kDataType
[
bits
[
0
]],
kPlace
[
bits
[
1
]],
kDataLayout
[
bits
[
2
]],
kLibraryType
[
bits
[
3
]]);
}
int
test_value
=
0
;
auto
kernel0
=
GenFromBit
({
0
,
0
,
0
,
0
});
auto
kernel1
=
GenFromBit
({
0
,
0
,
0
,
1
});
auto
kernel2
=
GenFromBit
({
0
,
0
,
1
,
0
});
auto
kernel3
=
GenFromBit
({
0
,
0
,
1
,
1
});
void
TransDataType_t
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
p
,
const
Variable
&
in
,
Variable
*
out
)
{
test_value
++
;
}
void
TransDataLayout_t
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
p
,
const
Variable
&
in
,
Variable
*
out
)
{
test_value
--
;
}
void
TransLibraryType_t
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
p
,
const
Variable
&
in
,
Variable
*
out
)
{
test_value
+=
2
;
}
}
// namespace framework
}
// namespace paddle
namespace
frw
=
paddle
::
framework
;
REGISTER_DATA_TRANSFORM_FN
(
frw
::
kernel0
,
frw
::
kernel1
,
frw
::
TransDataType_t
);
REGISTER_DATA_TRANSFORM_FN
(
frw
::
kernel1
,
frw
::
kernel2
,
frw
::
TransDataLayout_t
);
REGISTER_DATA_TRANSFORM_FN
(
frw
::
kernel0
,
frw
::
kernel2
,
frw
::
TransLibraryType_t
);
TEST
(
DataTransform
,
Register
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
auto
&
instance
=
DataTransformFnMap
::
Instance
();
paddle
::
framework
::
Variable
in
;
paddle
::
framework
::
Variable
out
;
DeviceContext
*
ctx
=
new
CPUDeviceContext
();
auto
pair0
=
std
::
make_pair
(
frw
::
kernel0
,
frw
::
kernel1
);
instance
.
Get
(
pair0
)(
ctx
,
pair0
,
in
,
&
out
);
ASSERT_EQ
(
test_value
,
1
);
auto
pair1
=
std
::
make_pair
(
frw
::
kernel1
,
frw
::
kernel2
);
instance
.
Get
(
pair1
)(
ctx
,
pair1
,
in
,
&
out
);
ASSERT_EQ
(
test_value
,
0
);
auto
pair3
=
std
::
make_pair
(
frw
::
kernel0
,
frw
::
kernel2
);
instance
.
Get
(
pair3
)(
ctx
,
pair3
,
in
,
&
out
);
ASSERT_EQ
(
test_value
,
2
);
}
TEST
(
DataTransform
,
DataLayout
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
auto
&
instance
=
DataTransformFnMap
::
Instance
();
Variable
in
;
Variable
out
;
Tensor
*
src
=
in
.
GetMutable
<
Tensor
>
();
src
->
mutable_data
<
double
>
(
make_ddim
({
2
,
3
,
1
,
2
}),
CPUPlace
());
src
->
set_layout
(
DataLayout
::
kNHWC
);
DeviceContext
*
ctx
=
new
CPUDeviceContext
();
{
auto
kernel1
=
GenFromBit
({
1
,
0
,
0
,
0
});
auto
kernel2
=
GenFromBit
({
1
,
0
,
1
,
0
});
auto
pair0
=
std
::
make_pair
(
kernel1
,
kernel2
);
instance
.
Get
(
pair0
)(
ctx
,
pair0
,
in
,
&
out
);
}
Tensor
dst
=
out
.
Get
<
Tensor
>
();
EXPECT_TRUE
(
dst
.
layout
()
==
DataLayout
::
kNCHW
);
EXPECT_TRUE
(
dst
.
dims
()
==
make_ddim
({
2
,
2
,
3
,
1
}));
{
auto
kernel1
=
GenFromBit
({
1
,
0
,
1
,
0
});
auto
kernel2
=
GenFromBit
({
1
,
0
,
0
,
0
});
auto
pair0
=
std
::
make_pair
(
kernel1
,
kernel2
);
instance
.
Get
(
pair0
)(
ctx
,
pair0
,
out
,
&
in
);
}
EXPECT_TRUE
(
src
->
layout
()
==
DataLayout
::
kNHWC
);
EXPECT_TRUE
(
src
->
dims
()
==
make_ddim
({
2
,
3
,
1
,
2
}));
}
TEST
(
DataTransform
,
DataType
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
auto
&
instance
=
DataTransformFnMap
::
Instance
();
DeviceContext
*
ctx
=
new
CPUDeviceContext
();
Variable
in
;
Variable
out
;
Tensor
*
src
=
in
.
GetMutable
<
Tensor
>
();
float
*
ptr
=
src
->
mutable_data
<
float
>
(
make_ddim
({
2
,
3
}),
CPUPlace
());
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
ptr
[
i
]
=
i
/
3
;
}
{
auto
kernel1
=
GenFromBit
({
0
,
0
,
0
,
0
});
auto
kernel2
=
GenFromBit
({
1
,
0
,
0
,
0
});
auto
pair0
=
std
::
make_pair
(
kernel1
,
kernel2
);
instance
.
Get
(
pair0
)(
ctx
,
pair0
,
in
,
&
out
);
}
Tensor
dst
=
out
.
Get
<
Tensor
>
();
EXPECT_TRUE
(
dst
.
data
<
double
>
()
!=
nullptr
);
}
paddle/framework/data_type_transform.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/data_type_transform.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/platform/transform.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
InType
,
typename
OutType
>
struct
CastDataTypeFunctor
{
HOSTDEVICE
inline
OutType
operator
()(
InType
in
)
const
{
return
static_cast
<
OutType
>
(
in
);
}
};
template
<
typename
InType
>
struct
CastDataType
{
CastDataType
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
platform
::
DeviceContext
*
ctx
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
)
{}
const
framework
::
Tensor
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
DeviceContext
*
ctx_
;
template
<
typename
OutType
>
void
operator
()()
{
auto
place
=
ctx_
->
GetPlace
();
auto
*
in_begin
=
in_
.
data
<
InType
>
();
auto
numel
=
in_
.
numel
();
auto
*
in_end
=
in_begin
+
numel
;
auto
*
out_begin
=
out_
->
mutable_data
<
OutType
>
(
place
);
if
(
platform
::
is_cpu_place
(
place
))
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
auto
*
context
=
static_cast
<
const
platform
::
CPUDeviceContext
*>
(
ctx_
);
trans
(
*
context
,
in_begin
,
in_end
,
out_begin
,
CastDataTypeFunctor
<
InType
,
OutType
>
());
}
else
{
// TODO(dzhwinter): enhance Copy CPU<->GPU with different data type?
PADDLE_THROW
(
"Unsupport CPU <-> GPU!"
);
}
}
};
void
TransDataType
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
)
{
PADDLE_ENFORCE
(
in
.
IsType
<
Tensor
>
(),
"Only Support Tensor transform!."
);
PADDLE_ENFORCE
(
platform
::
places_are_same_class
(
kernel_pair
.
first
.
place_
,
kernel_pair
.
second
.
place_
),
"TransDataType Only Support DataType transform on same place!"
);
auto
src
=
in
.
Get
<
Tensor
>
();
auto
*
dst
=
out
->
GetMutable
<
Tensor
>
();
auto
dims
=
src
.
dims
();
dst
->
Resize
(
dims
);
auto
dst_type
=
kernel_pair
.
second
.
data_type_
;
auto
src_type
=
kernel_pair
.
first
.
data_type_
;
switch
(
src_type
)
{
case
proto
::
DataType
::
FP32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
float
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
FP64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
double
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
INT32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
INT64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int64_t
>
(
src
,
dst
,
ctx
));
break
;
case
proto
::
DataType
::
BOOL
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
bool
>
(
src
,
dst
,
ctx
));
break
;
default:
PADDLE_THROW
(
"Not support type %d"
,
src_type
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/framework/data_type_transform.h
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_kernel_type.h"
#include "paddle/framework/variable.h"
#include "paddle/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
using
KernelTypePair
=
std
::
pair
<
OpKernelType
,
OpKernelType
>
;
void
TransDataType
(
const
platform
::
DeviceContext
*
ctx
,
const
KernelTypePair
&
kernel_pair
,
const
Variable
&
in
,
Variable
*
out
);
}
// namespace framework
}
// namespace paddle
paddle/framework/operator.cc
浏览文件 @
dc488c17
...
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include <algorithm>
#include "paddle/framework/data_transform.h"
#include "paddle/framework/device_data_transform.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/shape_inference.h"
...
...
paddle/gserver/layers/MKLDNNLayer.cpp
浏览文件 @
dc488c17
...
...
@@ -132,6 +132,8 @@ void MKLDNNLayer::reshapeInput(int& batchsize,
if
(
w
!=
0
)
{
width
=
w
;
}
height
=
height
!=
0
?
height
:
1
;
width
=
width
!=
0
?
width
:
1
;
}
void
MKLDNNLayer
::
reshapeOutput
(
size_t
height
,
size_t
width
)
{
...
...
paddle/gserver/layers/MKLDNNLayer.h
浏览文件 @
dc488c17
...
...
@@ -98,6 +98,8 @@ protected:
public:
explicit
MKLDNNLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
),
ih_
(
0
),
iw_
(
0
),
condition_
(
0
),
needResetBwd_
(
true
),
outputOnlyMKLDNN_
(
false
),
...
...
paddle/operators/detail/CMakeLists.txt
浏览文件 @
dc488c17
grpc_library
(
sendrecvop_grpc SRCS
recv_impl.cc send_impl
.cc PROTO send_recv.proto DEPS lod_tensor selected_rows
)
grpc_library
(
sendrecvop_grpc SRCS
sendrecvop_utils.cc grpc_client.cc grpc_server
.cc PROTO send_recv.proto DEPS lod_tensor selected_rows
)
paddle/operators/detail/grpc_client.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "grpc_client.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
bool
RPCClient
::
AsyncSendVariable
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
&
scope
,
const
std
::
string
&
var_name
,
int64_t
time_out
)
{
sendrecv
::
VariableMessage
req
;
auto
*
var
=
scope
.
FindVar
(
var_name
);
SerializeToMessage
(
var_name
,
var
,
ctx
,
&
req
);
// varhandle
VarHandle
var_h
;
var_h
.
ep
=
ep
;
var_h
.
scope
=
&
scope
;
var_h
.
name
=
var_name
;
var_h
.
ctx
=
&
ctx
;
// stub context
auto
ch
=
GetChannel
(
ep
);
SendProcessor
*
s
=
new
SendProcessor
(
ch
);
s
->
Prepare
(
var_h
,
time_out
);
s
->
response_call_back_
=
NULL
;
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
(
void
*
)
s
);
req_count_
++
;
return
true
;
}
void
ProcGetResponse
(
const
VarHandle
&
var_h
,
const
sendrecv
::
VariableMessage
&
ret_msg
)
{
auto
*
outvar
=
var_h
.
scope
->
FindVar
(
var_h
.
name
);
std
::
istringstream
iss
(
ret_msg
.
serialized
());
DeserializeFromMessage
(
ret_msg
,
*
var_h
.
ctx
,
outvar
);
}
bool
RPCClient
::
AsyncGetVariable
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
&
scope
,
const
std
::
string
&
var_name
,
int64_t
time_out
)
{
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
var_name
);
auto
*
var
=
scope
.
FindVar
(
var_name
);
SerializeToMessage
(
var_name
,
var
,
ctx
,
&
req
);
// varhandle
VarHandle
var_h
;
var_h
.
ep
=
ep
;
var_h
.
scope
=
&
scope
;
var_h
.
name
=
var_name
;
var_h
.
ctx
=
&
ctx
;
// stub context
auto
ch
=
GetChannel
(
ep
);
GetProcessor
*
s
=
new
GetProcessor
(
ch
);
s
->
Prepare
(
var_h
,
time_out
);
s
->
response_call_back_
=
ProcGetResponse
;
auto
rpc
=
s
->
stub_
->
AsyncGetVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
(
void
*
)
s
);
req_count_
++
;
return
true
;
}
bool
RPCClient
::
wait
()
{
bool
ok
=
true
;
while
(
true
)
{
if
(
req_count_
<=
0
)
{
break
;
}
if
(
!
Proceed
())
{
LOG
(
ERROR
)
<<
"Get meets CompletionQueue error"
;
return
false
;
}
}
return
ok
;
}
bool
RPCClient
::
Proceed
()
{
void
*
tag
=
NULL
;
bool
ok
=
false
;
// request counts.
if
(
!
cq_
.
Next
(
&
tag
,
&
ok
))
{
return
false
;
}
req_count_
--
;
GPR_ASSERT
(
ok
);
PADDLE_ENFORCE
(
tag
);
// TODO(gongwb): add more retries.
ClientBase
*
c
=
static_cast
<
ClientBase
*>
(
tag
);
if
(
!
c
->
status_
.
ok
())
{
delete
c
;
return
true
;
}
c
->
Process
();
delete
c
;
return
true
;
}
std
::
shared_ptr
<
grpc
::
Channel
>
RPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
auto
it
=
channels_
.
find
(
ep
);
if
(
it
!=
channels_
.
end
())
{
return
it
->
second
;
}
auto
ch
=
std
::
shared_ptr
<
grpc
::
Channel
>
(
grpc
::
CreateChannel
(
ep
,
grpc
::
InsecureChannelCredentials
()));
channels_
[
ep
]
=
ch
;
return
ch
;
}
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/detail/grpc_client.h
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <grpc++/grpc++.h>
#include <grpc/support/log.h>
#include <time.h>
#include <chrono>
#include <ctime>
#include <functional>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "paddle/framework/data_type.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/operators/detail/sendrecvop_utils.h"
#include "paddle/operators/detail/simple_block_queue.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
struct
VarHandle
{
std
::
string
ep
;
const
platform
::
DeviceContext
*
ctx
;
const
framework
::
Scope
*
scope
;
std
::
string
name
;
std
::
string
String
()
const
{
std
::
ostringstream
s
;
s
<<
"name:["
<<
name
<<
"] ep:["
<<
ep
<<
"]"
;
return
s
.
str
();
}
};
void
ProcGetResponse
(
const
VarHandle
&
var_h
,
const
sendrecv
::
VariableMessage
&
msg
);
class
ClientBase
{
public:
explicit
ClientBase
(
std
::
shared_ptr
<
grpc
::
Channel
>
ch
)
{
stub_
=
sendrecv
::
SendRecvService
::
NewStub
(
ch
);
context_
=
NULL
;
}
virtual
~
ClientBase
()
{}
virtual
void
Prepare
(
const
VarHandle
&
var_info
,
int64_t
time_out
)
{
context_
.
reset
(
new
grpc
::
ClientContext
());
var_h_
=
var_info
;
std
::
chrono
::
system_clock
::
time_point
deadline
=
std
::
chrono
::
system_clock
::
now
()
+
std
::
chrono
::
milliseconds
(
time_out
);
context_
->
set_deadline
(
deadline
);
}
virtual
void
Process
()
=
0
;
std
::
unique_ptr
<
sendrecv
::
SendRecvService
::
Stub
>
stub_
;
std
::
unique_ptr
<
grpc
::
ClientContext
>
context_
;
grpc
::
Status
status_
;
VarHandle
var_h_
;
};
typedef
std
::
function
<
void
(
const
VarHandle
&
,
const
sendrecv
::
VoidMessage
&
)
>
RequestSendCallBack
;
class
SendProcessor
:
public
ClientBase
{
public:
explicit
SendProcessor
(
std
::
shared_ptr
<
grpc
::
Channel
>
ch
)
:
ClientBase
(
ch
)
{}
virtual
~
SendProcessor
()
{}
virtual
void
Process
()
{
if
(
response_call_back_
)
{
response_call_back_
(
var_h_
,
reply_
);
}
}
sendrecv
::
VoidMessage
reply_
;
RequestSendCallBack
response_call_back_
=
NULL
;
};
typedef
std
::
function
<
void
(
const
VarHandle
&
,
const
sendrecv
::
VariableMessage
&
)
>
RequestGetCallBack
;
class
GetProcessor
:
public
ClientBase
{
public:
explicit
GetProcessor
(
std
::
shared_ptr
<
grpc
::
Channel
>
ch
)
:
ClientBase
(
ch
)
{}
virtual
~
GetProcessor
()
{}
virtual
void
Process
()
{
if
(
response_call_back_
)
{
response_call_back_
(
var_h_
,
reply_
);
}
}
sendrecv
::
VariableMessage
reply_
;
RequestGetCallBack
response_call_back_
=
ProcGetResponse
;
};
class
RPCClient
{
public:
bool
AsyncSendVariable
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
&
scope
,
const
std
::
string
&
var_name
,
int64_t
time_out
=
600
*
1000
);
bool
AsyncGetVariable
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
&
scope
,
const
std
::
string
&
var_name
,
int64_t
time_out
=
600
*
1000
);
bool
wait
();
private:
bool
Proceed
();
std
::
shared_ptr
<
grpc
::
Channel
>
GetChannel
(
const
std
::
string
&
ep
);
private:
grpc
::
CompletionQueue
cq_
;
std
::
map
<
std
::
string
,
std
::
shared_ptr
<
grpc
::
Channel
>>
channels_
;
int64_t
req_count_
=
0
;
};
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/detail/grpc_server.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/detail/grpc_server.h"
using
grpc
::
ServerAsyncResponseWriter
;
namespace
paddle
{
namespace
operators
{
namespace
detail
{
enum
CallStatus
{
PROCESS
=
0
,
FINISH
};
// reference:
// https://stackoverflow.com/questions/41732884/grpc-multiple-services-in-cpp-async-server
class
RequestBase
{
public:
explicit
RequestBase
(
sendrecv
::
SendRecvService
::
AsyncService
*
service
,
grpc
::
ServerCompletionQueue
*
cq
)
:
service_
(
service
),
cq_
(
cq
),
status_
(
PROCESS
)
{}
virtual
~
RequestBase
()
{}
virtual
void
Process
()
{
assert
(
false
);
}
CallStatus
Status
()
{
return
status_
;
}
void
SetStatus
(
CallStatus
status
)
{
status_
=
status
;
}
protected:
grpc
::
ServerContext
ctx_
;
sendrecv
::
SendRecvService
::
AsyncService
*
service_
;
grpc
::
ServerCompletionQueue
*
cq_
;
CallStatus
status_
;
};
typedef
std
::
pair
<
std
::
string
,
sendrecv
::
VariableMessage
>
MessageWithName
;
class
RequestSend
final
:
public
RequestBase
{
public:
explicit
RequestSend
(
sendrecv
::
SendRecvService
::
AsyncService
*
service
,
grpc
::
ServerCompletionQueue
*
cq
,
SimpleBlockQueue
<
MessageWithName
>*
queue
)
:
RequestBase
(
service
,
cq
),
queue_
(
queue
),
responder_
(
&
ctx_
)
{
service_
->
RequestSendVariable
(
&
ctx_
,
&
request_
,
&
responder_
,
cq_
,
cq_
,
this
);
}
virtual
~
RequestSend
()
{}
virtual
void
Process
()
{
MessageWithName
msg_with_name
=
std
::
make_pair
(
request_
.
varname
(),
std
::
move
(
request_
));
queue_
->
Push
(
std
::
move
(
msg_with_name
));
// TODO(gongwb): check var's info.
responder_
.
Finish
(
reply_
,
grpc
::
Status
::
OK
,
this
);
}
protected:
sendrecv
::
VariableMessage
request_
;
sendrecv
::
VoidMessage
reply_
;
SimpleBlockQueue
<
MessageWithName
>*
queue_
;
ServerAsyncResponseWriter
<
sendrecv
::
VoidMessage
>
responder_
;
};
class
RequestGet
final
:
public
RequestBase
{
public:
explicit
RequestGet
(
sendrecv
::
SendRecvService
::
AsyncService
*
service
,
grpc
::
ServerCompletionQueue
*
cq
,
framework
::
Scope
*
scope
)
:
RequestBase
(
service
,
cq
),
responder_
(
&
ctx_
),
scope_
(
scope
)
{
service_
->
RequestGetVariable
(
&
ctx_
,
&
request_
,
&
responder_
,
cq_
,
cq_
,
this
);
}
virtual
~
RequestGet
()
{}
virtual
void
Process
()
{
// proc request.
std
::
string
var_name
=
request_
.
varname
();
auto
*
var
=
scope_
->
FindVar
(
var_name
);
SerializeToMessage
(
var_name
,
var
,
platform
::
CPUDeviceContext
(),
&
reply_
);
// TODO(gongwb): check var's info.
responder_
.
Finish
(
reply_
,
grpc
::
Status
::
OK
,
this
);
}
protected:
sendrecv
::
VariableMessage
request_
;
sendrecv
::
VariableMessage
reply_
;
ServerAsyncResponseWriter
<
sendrecv
::
VariableMessage
>
responder_
;
framework
::
Scope
*
scope_
;
};
void
AsyncGRPCServer
::
RunSyncUpdate
()
{
grpc
::
ServerBuilder
builder
;
builder
.
AddListeningPort
(
address_
,
grpc
::
InsecureServerCredentials
());
builder
.
RegisterService
(
&
service_
);
cq_send_
=
builder
.
AddCompletionQueue
();
cq_get_
=
builder
.
AddCompletionQueue
();
server_
=
builder
.
BuildAndStart
();
LOG
(
INFO
)
<<
"Server listening on "
<<
address_
<<
std
::
endl
;
std
::
function
<
void
()
>
send_register
=
std
::
bind
(
&
AsyncGRPCServer
::
TryToRegisterNewSendOne
,
this
);
std
::
function
<
void
()
>
get_register
=
std
::
bind
(
&
AsyncGRPCServer
::
TryToRegisterNewGetOne
,
this
);
t_send_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
AsyncGRPCServer
::
HandleRequest
,
this
,
false
,
cq_send_
.
get
(),
"cq_send"
,
send_register
)));
t_get_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
AsyncGRPCServer
::
HandleRequest
,
this
,
true
,
cq_get_
.
get
(),
"cq_get"
,
get_register
)));
// wait server
server_
->
Wait
();
t_send_
->
join
();
t_get_
->
join
();
}
void
AsyncGRPCServer
::
ShutdownQueue
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
cq_send_
->
Shutdown
();
cq_get_
->
Shutdown
();
is_shut_down_
=
true
;
}
// This URL explains why shutdown is complicate:
// https://stackoverflow.com/questions/35708348/grpc-what-is-the-recommended-way-to-shut-down-an-asynchronous-server-in-c
void
AsyncGRPCServer
::
ShutDown
()
{
server_
->
Shutdown
();
ShutdownQueue
();
}
void
AsyncGRPCServer
::
TryToRegisterNewSendOne
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
if
(
is_shut_down_
)
{
return
;
}
RequestSend
*
send
=
new
RequestSend
(
&
service_
,
cq_send_
.
get
(),
&
var_recv_queue_
);
VLOG
(
4
)
<<
"create RequestSend status:"
<<
send
->
Status
();
}
void
AsyncGRPCServer
::
TryToRegisterNewGetOne
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
if
(
is_shut_down_
)
{
return
;
}
RequestGet
*
get
=
new
RequestGet
(
&
service_
,
cq_get_
.
get
(),
scope_
);
VLOG
(
4
)
<<
"create Requestget status:"
<<
get
->
Status
();
}
void
AsyncGRPCServer
::
SetFinishOrDelete
(
RequestBase
*&
last
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
cq_mutex_
);
if
(
is_shut_down_
)
{
delete
last
;
last
=
NULL
;
return
;
}
last
->
SetStatus
(
FINISH
);
return
;
}
void
AsyncGRPCServer
::
HandleRequest
(
bool
wait
,
grpc
::
ServerCompletionQueue
*
cq
,
std
::
string
cq_name
,
std
::
function
<
void
()
>
TryToRegisterNewOne
)
{
TryToRegisterNewOne
();
void
*
tag
=
NULL
;
bool
ok
=
false
;
while
(
true
)
{
if
(
!
cq
->
Next
(
&
tag
,
&
ok
))
{
LOG
(
INFO
)
<<
cq_name
<<
" get CompletionQueue shutdown!"
;
break
;
}
if
(
wait
&&
!
done_
)
{
Wait
();
}
RequestBase
*
base
=
(
RequestBase
*
)
tag
;
if
(
!
ok
)
{
VLOG
(
4
)
<<
cq_name
<<
" recv no regular event"
;
TryToRegisterNewOne
();
delete
base
;
continue
;
}
switch
(
base
->
Status
())
{
case
PROCESS
:
{
VLOG
(
4
)
<<
cq_name
<<
" status:"
<<
base
->
Status
();
TryToRegisterNewOne
();
base
->
Process
();
SetFinishOrDelete
(
base
);
break
;
}
case
FINISH
:
{
VLOG
(
4
)
<<
cq_name
<<
" status:"
<<
base
->
Status
();
delete
base
;
break
;
}
default:
{
assert
(
false
);
}
}
}
}
void
AsyncGRPCServer
::
Wait
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
this
->
mutex_
);
condition_
.
wait
(
lock
,
[
=
]
{
return
this
->
done_
==
true
;
});
}
void
AsyncGRPCServer
::
Reset
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
this
->
mutex_
);
done_
=
false
;
}
void
AsyncGRPCServer
::
Done
()
{
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
this
->
mutex_
);
done_
=
true
;
}
condition_
.
notify_all
();
}
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/detail/grpc_server.h
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/var_type.h"
#include "paddle/operators/detail/simple_block_queue.h"
#include "paddle/operators/detail/send_recv.grpc.pb.h"
#include "paddle/operators/detail/send_recv.pb.h"
#include <grpc++/grpc++.h>
#include <grpc/support/log.h>
#include <thread>
#include "paddle/operators/detail/sendrecvop_utils.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
typedef
std
::
pair
<
std
::
string
,
sendrecv
::
VariableMessage
>
MessageWithName
;
class
RequestBase
;
class
AsyncGRPCServer
final
:
public
sendrecv
::
SendRecvService
::
Service
{
public:
explicit
AsyncGRPCServer
(
std
::
string
address
)
{
address_
=
address
;
}
void
RunSyncUpdate
();
void
Reset
();
void
Done
();
void
SetScope
(
framework
::
Scope
*
scope
)
{
scope_
=
scope
;
}
const
MessageWithName
Get
()
{
return
this
->
var_recv_queue_
.
Pop
();
}
void
Push
(
const
MessageWithName
&
msg
)
{
this
->
var_recv_queue_
.
Push
(
msg
);
}
void
ShutDown
();
protected:
void
Wait
();
void
HandleRequest
(
bool
wait
,
grpc
::
ServerCompletionQueue
*
cq
,
std
::
string
cq_name
,
std
::
function
<
void
()
>
TryToRegisterNewOne
);
void
TryToRegisterNewSendOne
();
void
TryToRegisterNewGetOne
();
void
SetFinishOrDelete
(
RequestBase
*&
last
);
void
ShutdownQueue
();
private:
std
::
mutex
cq_mutex_
;
volatile
bool
is_shut_down_
=
false
;
std
::
unique_ptr
<
grpc
::
ServerCompletionQueue
>
cq_send_
;
std
::
unique_ptr
<
grpc
::
ServerCompletionQueue
>
cq_get_
;
sendrecv
::
SendRecvService
::
AsyncService
service_
;
std
::
unique_ptr
<
grpc
::
Server
>
server_
;
std
::
string
address_
;
framework
::
Scope
*
scope_
;
// received variable from RPC, operators fetch variable from this queue.
SimpleBlockQueue
<
MessageWithName
>
var_recv_queue_
;
// condition of the sub program
std
::
mutex
mutex_
;
volatile
mutable
bool
done_
;
std
::
condition_variable
condition_
;
std
::
unique_ptr
<
std
::
thread
>
t_send_
;
std
::
unique_ptr
<
std
::
thread
>
t_get_
;
};
};
// namespace detail
};
// namespace operators
};
// namespace paddle
paddle/operators/detail/send_recv.proto
浏览文件 @
dc488c17
...
...
@@ -21,8 +21,6 @@ service SendRecvService {
rpc
SendVariable
(
VariableMessage
)
returns
(
VoidMessage
)
{}
// Argument VariableMessage for GetVariable should only contain varname.
rpc
GetVariable
(
VariableMessage
)
returns
(
VariableMessage
)
{}
// wait for one execution of the program
rpc
Wait
(
VoidMessage
)
returns
(
VoidMessage
)
{}
}
// VariableMessage is serialized paddle variable message.
...
...
paddle/operators/detail/send
_recv_impl.h
→
paddle/operators/detail/send
recvop_utils.cc
浏览文件 @
dc488c17
...
...
@@ -12,87 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/var_type.h"
#include "paddle/operators/detail/simple_block_queue.h"
#include "paddle/operators/detail/send_recv.grpc.pb.h"
#include "paddle/operators/detail/send_recv.pb.h"
#include <grpc++/grpc++.h>
using
grpc
::
Channel
;
using
grpc
::
Server
;
using
grpc
::
ServerContext
;
using
grpc
::
ServerReader
;
using
grpc
::
ServerBuilder
;
using
grpc
::
ClientContext
;
using
grpc
::
ClientReader
;
using
grpc
::
ClientReaderWriter
;
using
grpc
::
ClientWriter
;
using
grpc
::
Status
;
using
sendrecv
::
SendRecvService
;
using
sendrecv
::
VariableMessage
;
using
sendrecv
::
VoidMessage
;
#include "paddle/operators/detail/sendrecvop_utils.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
typedef
std
::
pair
<
std
::
string
,
sendrecv
::
VariableMessage
>
MessageWithName
;
class
SendRecvServerImpl
final
:
public
SendRecvService
::
Service
{
public:
explicit
SendRecvServerImpl
()
{}
Status
SendVariable
(
ServerContext
*
context
,
const
VariableMessage
*
in_var
,
VoidMessage
*
out_var
)
override
;
Status
GetVariable
(
ServerContext
*
context
,
const
VariableMessage
*
in_var
,
VariableMessage
*
out_var
)
override
;
Status
Wait
(
ServerContext
*
context
,
const
VoidMessage
*
in_var
,
VoidMessage
*
out_var
)
override
;
void
Reset
();
void
Done
();
void
SetScope
(
framework
::
Scope
*
scope
)
{
scope_
=
scope
;
};
const
MessageWithName
Get
()
{
return
this
->
var_recv_queue_
.
Pop
();
}
void
Push
(
const
MessageWithName
&
msg
)
{
this
->
var_recv_queue_
.
Push
(
msg
);
}
private:
// received variable from RPC, operators fetch variable from this queue.
SimpleBlockQueue
<
MessageWithName
>
var_recv_queue_
;
framework
::
Scope
*
scope_
;
// condition of the sub program
std
::
mutex
mutex_
;
bool
done_
;
std
::
condition_variable
condition_
;
};
// RPCClient is a class to send tensors to pserver sub-network
// using different hashing methods.
class
RPCClient
{
public:
RPCClient
(
std
::
shared_ptr
<
Channel
>
channel
)
:
stub_
(
SendRecvService
::
NewStub
(
channel
))
{}
bool
SendVariable
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
inname
);
bool
GetVariable
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
outname
);
void
Wait
();
private:
std
::
unique_ptr
<
SendRecvService
::
Stub
>
stub_
;
};
inline
void
SerializeToMessage
(
const
std
::
string
&
name
,
const
framework
::
Variable
*
var
,
const
platform
::
DeviceContext
&
ctx
,
VariableMessage
*
msg
)
{
void
SerializeToMessage
(
const
std
::
string
&
name
,
const
framework
::
Variable
*
var
,
const
platform
::
DeviceContext
&
ctx
,
sendrecv
::
VariableMessage
*
msg
)
{
msg
->
set_varname
(
name
);
std
::
ostringstream
oss
;
switch
(
framework
::
ToVarType
(
var
->
Type
()))
{
...
...
@@ -114,10 +42,9 @@ inline void SerializeToMessage(const std::string &name,
msg
->
set_serialized
(
oss
.
str
());
}
inline
void
DeserializeFromMessage
(
const
VariableMessage
&
msg
,
const
platform
::
DeviceContext
&
ctx
,
framework
::
Variable
*
var
)
{
using
namespace
paddle
::
framework
::
proto
;
void
DeserializeFromMessage
(
const
sendrecv
::
VariableMessage
&
msg
,
const
platform
::
DeviceContext
&
ctx
,
framework
::
Variable
*
var
)
{
std
::
istringstream
iss
(
msg
.
serialized
());
switch
(
msg
.
type
())
{
case
sendrecv
::
VarType
::
LOD_TENSOR
:
...
...
paddle/operators/detail/send
_impl.cc
→
paddle/operators/detail/send
recvop_utils.h
浏览文件 @
dc488c17
...
...
@@ -12,56 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "send_recv_impl.h"
#pragma once
#include <iostream>
#include <string>
#include <vector>
#include "paddle/framework/data_type.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/var_type.h"
#include "paddle/operators/detail/send_recv.grpc.pb.h"
#include "paddle/operators/detail/send_recv.pb.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
bool
RPCClient
::
SendVariable
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
inname
)
{
ClientContext
context
;
VariableMessage
msg
;
VoidMessage
out_msg
;
// FIXME(typhoonzero): pass device context to here.
auto
ctx
=
platform
::
CPUDeviceContext
();
auto
*
var
=
scope
.
FindVar
(
inname
);
PADDLE_ENFORCE
(
var
);
SerializeToMessage
(
inname
,
var
,
ctx
,
&
msg
);
Status
status
=
stub_
->
SendVariable
(
&
context
,
msg
,
&
out_msg
);
if
(
!
status
.
ok
())
{
LOG
(
ERROR
)
<<
"gRPC error: "
<<
status
.
error_message
();
return
false
;
}
return
true
;
}
bool
RPCClient
::
GetVariable
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
outname
)
{
ClientContext
context
;
VariableMessage
call_msg
,
ret_msg
;
call_msg
.
set_varname
(
outname
);
auto
ctx
=
platform
::
CPUDeviceContext
();
Status
status
=
stub_
->
GetVariable
(
&
context
,
call_msg
,
&
ret_msg
);
auto
*
outvar
=
scope
.
FindVar
(
outname
);
if
(
!
status
.
ok
())
{
LOG
(
ERROR
)
<<
"gRPC error: "
<<
status
.
error_message
();
return
false
;
}
std
::
istringstream
iss
(
ret_msg
.
serialized
());
DeserializeFromMessage
(
ret_msg
,
ctx
,
outvar
);
return
true
;
}
void
RPCClient
::
Wait
()
{
ClientContext
context
;
VoidMessage
call_msg
,
ret_msg
;
stub_
->
Wait
(
&
context
,
call_msg
,
&
ret_msg
);
}
void
SerializeToMessage
(
const
std
::
string
&
name
,
const
framework
::
Variable
*
var
,
const
platform
::
DeviceContext
&
ctx
,
sendrecv
::
VariableMessage
*
msg
);
void
DeserializeFromMessage
(
const
sendrecv
::
VariableMessage
&
msg
,
const
platform
::
DeviceContext
&
ctx
,
framework
::
Variable
*
var
);
}
// namespace detail
}
// namespace operators
}
// namespace paddle
paddle/operators/edit_distance_op.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/edit_distance_op.h"
namespace
paddle
{
namespace
operators
{
class
EditDistanceOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Hyps"
),
"Input(Hyps) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Refs"
),
"Input(Refs) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) shouldn't be null."
);
auto
hyp_dims
=
ctx
->
GetInputDim
(
"Hyps"
);
auto
ref_dims
=
ctx
->
GetInputDim
(
"Refs"
);
PADDLE_ENFORCE
(
hyp_dims
.
size
()
==
2
&&
hyp_dims
[
1
]
==
1
,
"Input(Hyps) must be a 2-D LoDTensor with the 2nd dimension "
"equal to 1."
);
PADDLE_ENFORCE
(
ref_dims
.
size
()
==
2
&&
ref_dims
[
1
]
==
1
,
"Input(Refs) must be a 2-D LoDTensor with the 2nd dimension "
"equal to 1."
);
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"Refs"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
DataType
::
FP32
,
ctx
.
device_context
());
}
};
class
EditDistanceOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
EditDistanceOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Hyps"
,
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
"The indices for hypothesis strings."
);
AddInput
(
"Refs"
,
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
"The indices for reference strings."
);
AddAttr
<
bool
>
(
"normalized"
,
"(bool, default false) Indicated whether to normalize "
"the edit distance by the length of reference string."
)
.
SetDefault
(
false
);
AddOutput
(
"Out"
,
"(2-D Tensor with shape [`batch_size` x 1]) "
"The output edit distances of EditDistance operator."
);
AddComment
(
R"DOC(
EditDistance operator computes the edit distances between a batch of hypothesis
strings and their references.
Edit distance, also called Levenshtein distance, measures how dissimilar two strings
are by counting the minimum number of operations to transform one string into anthor.
Here the operations include insertion, deletion, and substitution. For example,
given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
is 3 for A will be transformed into B at least after two substitutions and one
insertion:
"kitten" -> "sitten" -> "sittin" -> "sitting"
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
number denoted by `batch_size`, and the separation is specified by the LoD information.
And the `batch_size` reference strings are arranged in order in the same way in the
LoDTensor Input(Refs).
Output(Out) contains the `batch_size` results and each stands for the edit stance
for a pair of strings respectively. If Attr(normalized) is true, the edit distance
will be divided by the length of reference string.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
edit_distance
,
ops
::
EditDistanceOp
,
ops
::
EditDistanceOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
edit_distance
,
ops
::
EditDistanceKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/operators/edit_distance_op.cu
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/framework/op_registry.h"
#include "paddle/platform/cuda_helper.h"
#include "paddle/platform/gpu_info.h"
namespace
paddle
{
namespace
operators
{
using
platform
::
PADDLE_CUDA_NUM_THREADS
;
template
<
typename
T
>
__global__
void
FillFirstRow
(
T
*
dist
,
const
int
N
)
{
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
idx
<
N
+
1
)
{
dist
[
idx
]
=
idx
;
}
}
template
<
typename
T
>
__global__
void
FillFirstColumn
(
T
*
dist
,
const
int
M
,
const
int
N
)
{
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
idx
<
M
+
1
)
{
dist
[
idx
*
(
N
+
1
)]
=
idx
;
}
}
template
<
typename
T
>
__global__
void
Levenshtein
(
T
*
dist
,
const
int
*
x1
,
const
int
*
x2
,
const
int
M
,
const
int
N
,
const
int
start
)
{
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
offset
=
N
;
int
index
=
start
+
idx
*
offset
;
int
row
=
index
/
(
N
+
1
);
int
col
=
index
%
(
N
+
1
);
if
(
row
>
0
&&
col
>
0
&&
row
<
M
+
1
&&
col
<
N
+
1
)
{
int
cost
=
x1
[
row
-
1
]
==
x2
[
col
-
1
]
?
0
:
1
;
int
dels
=
dist
[(
row
-
1
)
*
(
N
+
1
)
+
col
]
+
1
;
int
ins
=
dist
[
row
*
(
N
+
1
)
+
col
-
1
]
+
1
;
int
subs
=
dist
[(
row
-
1
)
*
(
N
+
1
)
+
(
col
-
1
)]
+
cost
;
dist
[
index
]
=
min
(
dels
,
min
(
ins
,
subs
));
}
}
template
<
typename
T
>
__global__
void
SetOutput
(
T
*
out
,
const
T
*
dist
,
const
int
M
,
const
int
N
,
bool
normalized
)
{
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
idx
==
0
)
{
out
[
0
]
=
normalized
?
dist
[
M
*
(
N
+
1
)
+
N
]
/
N
:
dist
[
M
*
(
N
+
1
)
+
N
];
}
}
template
<
typename
Place
,
typename
T
>
class
EditDistanceGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
out_t
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
x1_t
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Hyps"
);
auto
*
x2_t
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Refs"
);
auto
normalized
=
ctx
.
Attr
<
bool
>
(
"normalized"
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
.
device_context
())
.
stream
();
auto
hyp_lod
=
x1_t
->
lod
()[
0
];
auto
ref_lod
=
x2_t
->
lod
()[
0
];
PADDLE_ENFORCE
(
hyp_lod
.
size
()
==
ref_lod
.
size
(),
"Input(Hyps) and Input(Refs) must have the same batch size."
);
for
(
size_t
i
=
1
;
i
<
ref_lod
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
ref_lod
[
i
]
>
ref_lod
[
i
-
1
],
"Reference string %d is empty."
,
i
);
}
auto
num_strs
=
hyp_lod
.
size
()
-
1
;
out_t
->
Resize
({
static_cast
<
int64_t
>
(
num_strs
),
1
});
out_t
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out
=
out_t
->
data
<
T
>
();
T
distance
=
0.0
;
for
(
size_t
num
=
0
;
num
<
num_strs
;
num
++
)
{
auto
m
=
static_cast
<
int64_t
>
(
hyp_lod
[
num
+
1
]
-
hyp_lod
[
num
]);
auto
n
=
static_cast
<
int64_t
>
(
ref_lod
[
num
+
1
]
-
ref_lod
[
num
]);
if
(
m
==
0
||
n
==
0
)
{
distance
=
std
::
max
(
m
,
n
);
if
(
normalized
)
{
PADDLE_ENFORCE
(
n
>
0
,
"The reference string (#%d) cannot be empty "
"when Attr(normalized) is enabled."
,
n
);
distance
=
distance
/
n
;
}
memory
::
Copy
(
boost
::
get
<
Place
>
(
ctx
.
GetPlace
()),
out
+
num
,
platform
::
CPUPlace
(),
&
distance
,
sizeof
(
T
),
stream
);
}
else
{
framework
::
Tensor
dist_t
;
dist_t
.
Resize
({
m
+
1
,
n
+
1
});
dist_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dist
=
dist_t
.
data
<
T
>
();
auto
x1
=
x1_t
->
data
<
int
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
>
()
+
ref_lod
[
num
];
FillFirstColumn
<
T
><<<
1
+
m
/
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
dist
,
m
,
n
);
FillFirstRow
<
T
><<<
1
+
n
/
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
dist
,
n
);
// Compute the elements of distance matrix in the anti-diagonal diretion
for
(
int64_t
slice
=
2
;
slice
<
m
+
n
+
1
;
++
slice
)
{
int
z_m
=
slice
<
m
+
1
?
0
:
slice
-
m
;
int
z_n
=
slice
<
n
+
1
?
0
:
slice
-
n
;
int
size
=
slice
-
(
z_m
+
z_n
)
+
1
;
// number of elments in the same
// anti-diagonal line to update
// the start index at which computes from
int
start
=
slice
<
n
+
1
?
slice
:
(
z_n
+
1
)
*
(
n
+
1
)
-
1
;
Levenshtein
<
T
><<<
1
+
(
size
-
1
)
/
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
dist
,
x1
,
x2
,
m
,
n
,
start
);
}
SetOutput
<
T
><<<
1
,
1
,
0
,
stream
>>>
(
out
+
num
,
dist
,
m
,
n
,
normalized
);
}
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
edit_distance
,
ops
::
EditDistanceGPUKernel
<
paddle
::
platform
::
CUDAPlace
,
float
>
);
paddle/operators/edit_distance_op.h
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
Place
,
typename
T
>
class
EditDistanceKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
out_t
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
x1_t
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Hyps"
);
auto
*
x2_t
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Refs"
);
auto
normalized
=
ctx
.
Attr
<
bool
>
(
"normalized"
);
auto
hyp_lod
=
x1_t
->
lod
()[
0
];
auto
ref_lod
=
x2_t
->
lod
()[
0
];
PADDLE_ENFORCE
(
hyp_lod
.
size
()
==
ref_lod
.
size
(),
"Input(Hyps) and Input(Refs) must have the same batch size."
);
for
(
size_t
i
=
1
;
i
<
ref_lod
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
ref_lod
[
i
]
>
ref_lod
[
i
-
1
],
"Reference string %d is empty."
,
i
);
}
auto
num_strs
=
hyp_lod
.
size
()
-
1
;
out_t
->
Resize
({
static_cast
<
int64_t
>
(
num_strs
),
1
});
out_t
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
auto
out
=
out_t
->
data
<
T
>
();
T
distance
=
0.0
;
for
(
size_t
num
=
0
;
num
<
num_strs
;
++
num
)
{
auto
m
=
static_cast
<
int64_t
>
(
hyp_lod
[
num
+
1
]
-
hyp_lod
[
num
]);
auto
n
=
static_cast
<
int64_t
>
(
ref_lod
[
num
+
1
]
-
ref_lod
[
num
]);
if
(
m
==
0
)
{
distance
=
n
;
}
else
if
(
n
==
0
)
{
distance
=
m
;
}
else
{
framework
::
Tensor
dist_t
;
dist_t
.
Resize
({
m
+
1
,
n
+
1
});
dist_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dist
=
dist_t
.
data
<
T
>
();
auto
x1
=
x1_t
->
data
<
int
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
>
()
+
ref_lod
[
num
];
for
(
int64_t
i
=
0
;
i
<
m
+
1
;
++
i
)
{
dist
[
i
*
(
n
+
1
)]
=
i
;
}
for
(
int64_t
j
=
0
;
j
<
n
+
1
;
++
j
)
{
dist
[
j
]
=
j
;
}
for
(
int64_t
i
=
1
;
i
<
m
+
1
;
++
i
)
{
for
(
int64_t
j
=
1
;
j
<
n
+
1
;
++
j
)
{
int
cost
=
x1
[
i
-
1
]
==
x2
[
j
-
1
]
?
0
:
1
;
int
dels
=
dist
[(
i
-
1
)
*
(
n
+
1
)
+
j
]
+
1
;
int
ins
=
dist
[
i
*
(
n
+
1
)
+
(
j
-
1
)]
+
1
;
int
subs
=
dist
[(
i
-
1
)
*
(
n
+
1
)
+
(
j
-
1
)]
+
cost
;
dist
[
i
*
(
n
+
1
)
+
j
]
=
std
::
min
(
dels
,
std
::
min
(
ins
,
subs
));
}
}
distance
=
dist
[
m
*
(
n
+
1
)
+
n
];
}
if
(
normalized
)
{
PADDLE_ENFORCE
(
n
>
0
,
"The reference string (#%d) cannot be empty "
"when Attr(normalized) is enabled."
,
n
);
distance
=
distance
/
n
;
}
out
[
num
]
=
distance
;
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/operators/recv_op.cc
浏览文件 @
dc488c17
...
...
@@ -24,7 +24,8 @@ limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/operators/detail/send_recv_impl.h"
#include "paddle/operators/detail/grpc_server.h"
#include "paddle/operators/detail/sendrecvop_utils.h"
#include "paddle/operators/detail/simple_block_queue.h"
#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV"
...
...
@@ -32,6 +33,11 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
void
RunServer
(
std
::
shared_ptr
<
detail
::
AsyncGRPCServer
>
service
)
{
service
->
RunSyncUpdate
();
VLOG
(
4
)
<<
"RunServer thread end"
;
}
static
void
CreateTensorFromMessageType
(
framework
::
Variable
*
var
,
sendrecv
::
VarType
var_type
)
{
if
(
var_type
==
sendrecv
::
VarType
::
LOD_TENSOR
)
{
...
...
@@ -46,18 +52,6 @@ static void CreateTensorFromMessageType(framework::Variable *var,
}
}
void
RunServer
(
Server
**
rpc_server
,
std
::
shared_ptr
<
detail
::
SendRecvServerImpl
>
service
,
const
std
::
string
&
server_address
)
{
ServerBuilder
builder
;
builder
.
AddListeningPort
(
server_address
,
grpc
::
InsecureServerCredentials
());
builder
.
RegisterService
(
service
.
get
());
std
::
unique_ptr
<
Server
>
server
(
builder
.
BuildAndStart
());
*
rpc_server
=
server
.
get
();
LOG
(
INFO
)
<<
"Server listening on "
<<
server_address
;
server
->
Wait
();
}
class
RecvOp
:
public
framework
::
OperatorBase
{
public:
RecvOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
...
...
@@ -65,10 +59,9 @@ class RecvOp : public framework::OperatorBase {
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{
if
(
!
rpc_service_
)
{
rpc_service_
.
reset
(
new
detail
::
SendRecvServerImpl
());
std
::
string
endpoint
=
Attr
<
std
::
string
>
(
"endpoint"
);
server_thread_
.
reset
(
new
std
::
thread
(
RunServer
,
&
rpc_server_
,
rpc_service_
,
endpoint
));
rpc_service_
.
reset
(
new
detail
::
AsyncGRPCServer
(
endpoint
));
server_thread_
.
reset
(
new
std
::
thread
(
RunServer
,
rpc_service_
));
}
}
...
...
@@ -76,7 +69,7 @@ class RecvOp : public framework::OperatorBase {
detail
::
MessageWithName
term_msg
;
term_msg
.
first
=
LISTEN_TERMINATE_MESSAGE
;
rpc_service_
->
Push
(
term_msg
);
rpc_serv
er_
->
Shutd
own
();
rpc_serv
ice_
->
ShutD
own
();
server_thread_
->
join
();
}
...
...
@@ -99,10 +92,12 @@ class RecvOp : public framework::OperatorBase {
auto
grad_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"GradList"
);
auto
trainer_count
=
Attr
<
int
>
(
"Trainers"
);
size_t
param_count
=
param_list
.
size
();
rpc_service_
->
Reset
();
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
bool
exit_flag
=
false
;
while
(
!
exit_flag
)
{
// TODO(gognwb): simply this loop.
// Get from multiple trainers, we don't care about order in which
// the gradient arrives, just add suffix 0~n then average the gradient.
for
(
size_t
i
=
0
;
i
<
param_count
*
trainer_count
;
++
i
)
{
...
...
@@ -110,6 +105,7 @@ class RecvOp : public framework::OperatorBase {
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
auto
grad_var_name
=
v
.
first
;
if
(
grad_var_name
==
LISTEN_TERMINATE_MESSAGE
)
{
VLOG
(
4
)
<<
"received LISTEN_TERMINATE_MESSAGE and RunOp.Run() exit"
;
exit_flag
=
true
;
break
;
}
...
...
@@ -118,10 +114,12 @@ class RecvOp : public framework::OperatorBase {
if
(
it
!=
grad_list
.
end
())
{
param_var_name
=
param_list
[
it
-
grad_list
.
begin
()];
}
else
{
LOG
(
ERROR
)
<<
"grad have no paired param found!"
;
LOG
(
ERROR
)
<<
"grad have no paired param found!
\"
"
<<
grad_var_name
<<
"
\"
"
;
}
VLOG
(
3
)
<<
"recved grad: "
<<
grad_var_name
<<
" updating param: "
<<
param_var_name
;
auto
*
merged_grad
=
recv_scope
.
FindVar
(
grad_var_name
);
if
(
merged_grad
==
nullptr
)
{
auto
*
ptr
=
recv_scope
.
Var
(
grad_var_name
);
...
...
@@ -141,9 +139,11 @@ class RecvOp : public framework::OperatorBase {
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
detail
::
DeserializeFromMessage
(
v
.
second
,
dev_ctx
,
var
);
}
if
(
exit_flag
)
{
break
;
}
rpc_service_
->
Reset
();
std
::
string
program_str
=
Attr
<
std
::
string
>
(
"OptimizeProgram"
);
...
...
@@ -158,17 +158,14 @@ class RecvOp : public framework::OperatorBase {
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
rpc_service_
->
Done
();
grads_counter_
.
clear
();
}
// while(true)
}
protected:
// grpc server instance to track status and gracefully shutdown.
// borrow an pointer from server thread.
Server
*
rpc_server_
{
nullptr
};
// grpc send/recv service implement to register.
std
::
shared_ptr
<
detail
::
SendRecvServerImpl
>
rpc_service_
;
std
::
shared_ptr
<
detail
::
AsyncGRPCServer
>
rpc_service_
;
std
::
shared_ptr
<
std
::
thread
>
server_thread_
;
mutable
std
::
unordered_map
<
std
::
string
,
int
>
grads_counter_
;
};
...
...
paddle/operators/send_op.cc
浏览文件 @
dc488c17
...
...
@@ -19,59 +19,45 @@ limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include
"paddle/operators/detail/send_recv_impl.h"
#include "paddle/operators/detail/
simple_block_queue
.h"
#include
<future>
#include "paddle/operators/detail/
grpc_client
.h"
namespace
paddle
{
namespace
operators
{
// TODO(typhoonzero): this is a simple implementation which only send
// one tensor
class
SendOp
:
public
framework
::
OperatorBase
{
public:
SendOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{
// init client when the operator is created at runtime.
std
::
vector
<
std
::
string
>
endpoints
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoints"
);
for
(
auto
ep
:
endpoints
)
{
client_map_
[
ep
].
reset
(
new
detail
::
RPCClient
(
grpc
::
CreateChannel
(
ep
,
grpc
::
InsecureChannelCredentials
())));
}
}
SendOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
auto
ins
=
Inputs
(
"X"
);
auto
outs
=
Outputs
(
"Out"
);
std
::
vector
<
std
::
string
>
epmap
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"epmap"
);
// TODO(typhoonzero): use async calls to send multiple variable asyncly.
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
++
i
)
{
bool
ret
=
client_map_
[
epmap
[
i
]]
->
SendVariable
(
scope
,
ins
[
i
]);
if
(
!
ret
)
{
LOG
(
ERROR
)
<<
"send variable error: "
<<
ins
[
i
];
}
// FIXME(gongwb): DeviceContext?
auto
ctx
=
platform
::
CPUDeviceContext
();
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
client_
.
AsyncSendVariable
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]);
}
// TODO(typhoonzero): support async optimization
client_map_
[
epmap
[
0
]]
->
Wait
();
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
bool
ret
=
client_map_
[
epmap
[
i
]]
->
GetVariable
(
scope
,
outs
[
i
]);
if
(
!
ret
)
{
LOG
(
ERROR
)
<<
"GetVariable error: "
<<
outs
[
i
];
}
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
i
++
)
{
client_
.
AsyncGetVariable
(
epmap
[
i
],
ctx
,
scope
,
outs
[
i
]);
}
client_
.
wait
();
}
protected:
mutable
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
detail
::
RPCClient
>>
client_map_
;
private:
mutable
detail
::
RPCClient
client_
;
};
class
SendOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
SendOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
SendOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) Input tensor to be send"
).
AsDuplicable
();
AddOutput
(
"Out"
,
"(Tensor) Output tensor to get from server"
)
...
...
paddle/operators/send_recv_op_test.cc
浏览文件 @
dc488c17
...
...
@@ -140,7 +140,7 @@ void StartServerNet(bool is_sparse) {
TEST
(
SendRecvOp
,
CPUDense
)
{
std
::
thread
server_thread
(
StartServerNet
,
false
);
sleep
(
3
);
// wait server to start
sleep
(
10
);
// wait server to start
// local net
f
::
Scope
scope
;
p
::
CPUPlace
place
;
...
...
paddle/operators/sequence_erase_op.cc
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_erase_op.h"
namespace
paddle
{
namespace
operators
{
class
SequenceEraseOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SequenceEraseOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SequenceEraseOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2
&&
x_dims
[
1
]
==
1
,
"Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
"with the 2nd dimension equal to 1."
);
ctx
->
SetOutputDim
(
"Out"
,
x_dims
);
}
};
class
SequenceEraseOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
SequenceEraseOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(2-D LoDTensor with the 2nd dim. equal to 1) "
"Input LoDTensor of SequenceEraseOp."
);
AddOutput
(
"Out"
,
"(2-D LoDTensor with the 2nd dim. equal to 1) "
"Output LoDTensor of SequenceEraseOp."
);
AddAttr
<
std
::
vector
<
int
>>
(
"tokens"
,
"(vector<int>) Tokens need to be erased from "
"input sequences."
);
AddComment
(
R"DOC(
Sequence Erase Operator.
Sequence erase operator erases tokens specified by Attr(tokens) from the input
sequences Input(X), and outputs the remaining data and modifies the LoD
information at the same time. For example, given a 2-D LoDTensor
X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T
with lod = [[0, 3, 6, 10]], there are three sequences in the input:
X1 = [[2, 2, 6]]^T, X2 = [[1, 3, 9]]^T and X3 = [[6, 1, 0, 1]]^T.
If the tokens to be erased are Attr(tokens) = [2, 3, 5], after the erasing
operation, the three sequences become
X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.
Hence the LoDTensor Output(Out) should be
Out = [[6, 1, 9, 6, 1, 0, 1]]^T,
with lod = [[0, 1, 3, 7]].
An example usage for this operator is to remove the special tokens when
computing the edit distance between two strings, such as blank, start token,
and end token.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
sequence_erase
,
ops
::
SequenceEraseOp
,
ops
::
SequenceEraseOpMaker
);
REGISTER_OP_CPU_KERNEL
(
sequence_erase
,
ops
::
SequenceEraseKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int32_t
>
);
paddle/operators/sequence_erase_op.cu
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include "paddle/operators/sequence_erase_op.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
operators
{
using
platform
::
PADDLE_CUDA_NUM_THREADS
;
using
LoDTensor
=
framework
::
LoDTensor
;
template
<
typename
T
>
__global__
void
LabelErasedIdx
(
const
T
*
in_dat
,
const
int
in_len
,
const
T
*
tokens
,
const
int
tokens_len
,
int
*
num_erased
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
in_len
)
{
int
erased
=
0
;
for
(
int
i
=
0
;
i
<
tokens_len
;
++
i
)
{
if
(
in_dat
[
index
]
==
tokens
[
i
])
{
erased
=
1
;
}
}
num_erased
[
index
+
1
]
=
erased
;
if
(
index
==
0
)
{
num_erased
[
0
]
=
0
;
}
}
}
template
<
typename
T
>
__global__
void
GetOutLod
(
const
T
*
num_erased
,
const
int
*
in_lod
,
const
int
lod_len
,
int
*
out_lod0
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
lod_len
)
{
out_lod0
[
index
]
=
in_lod
[
index
]
-
num_erased
[
in_lod
[
index
]];
}
}
template
<
typename
T
>
__global__
void
SetOutput
(
const
T
*
in_dat
,
const
int
in_len
,
const
int
*
num_erased
,
T
*
out_dat
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
in_len
)
{
if
(
in_dat
[
index
]
!=
in_dat
[
index
+
1
])
{
out_dat
[
index
-
num_erased
[
index
]]
=
in_dat
[
index
];
}
}
}
template
<
typename
T
>
class
SequenceEraseOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
lod
=
in
->
lod
();
PADDLE_ENFORCE_EQ
(
lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
lod
[
0
].
back
(),
(
size_t
)
in
->
numel
(),
"The actual size mismatches with the LoD information."
);
auto
tokens
=
ctx
.
Attr
<
std
::
vector
<
T
>>
(
"tokens"
);
auto
tokens_len
=
tokens
.
size
();
auto
in_len
=
in
->
numel
();
auto
in_dat
=
in
->
data
<
T
>
();
auto
lod0
=
lod
[
0
];
thrust
::
host_vector
<
T
>
host_tokens
(
tokens_len
);
for
(
size_t
i
=
0
;
i
<
tokens
.
size
();
++
i
)
{
host_tokens
[
i
]
=
tokens
[
i
];
}
thrust
::
device_vector
<
T
>
dev_tokens
=
host_tokens
;
thrust
::
device_vector
<
int
>
num_erased
(
in_len
+
1
);
T
*
dev_tokens_ptr
=
thrust
::
raw_pointer_cast
(
dev_tokens
.
data
());
int
*
num_erased_ptr
=
thrust
::
raw_pointer_cast
(
num_erased
.
data
());
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
LabelErasedIdx
<<<
(
in_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
in_dat
,
in_len
,
dev_tokens_ptr
,
tokens_len
,
num_erased_ptr
);
thrust
::
inclusive_scan
(
num_erased
.
begin
()
+
1
,
num_erased
.
end
(),
num_erased
.
begin
()
+
1
);
// Calc LoD
auto
lod_len
=
lod0
.
size
();
thrust
::
host_vector
<
int
>
host_lod
(
lod_len
);
for
(
size_t
i
=
0
;
i
<
lod_len
;
++
i
)
{
host_lod
[
i
]
=
lod0
[
i
];
}
thrust
::
device_vector
<
int
>
dev_in_lod
=
host_lod
;
thrust
::
device_vector
<
int
>
dev_out_lod
(
lod_len
);
int
*
dev_in_lod_ptr
=
thrust
::
raw_pointer_cast
(
dev_in_lod
.
data
());
int
*
dev_out_lod_ptr
=
thrust
::
raw_pointer_cast
(
dev_out_lod
.
data
());
GetOutLod
<<<
(
lod_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
num_erased_ptr
,
dev_in_lod_ptr
,
lod_len
,
dev_out_lod_ptr
);
thrust
::
host_vector
<
int
>
host_out_lod
=
dev_out_lod
;
std
::
vector
<
int
>
out_lod0
(
lod_len
,
0
);
for
(
size_t
i
=
0
;
i
<
lod_len
;
i
++
)
{
out_lod0
[
i
]
=
host_out_lod
[
i
];
}
framework
::
LoD
out_lod
;
out_lod
.
push_back
(
out_lod0
);
out
->
set_lod
(
out_lod
);
// Set output
out
->
Resize
({
out_lod0
.
back
(),
1
});
auto
out_dat
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
SetOutput
<<<
(
in_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
in_dat
,
in_len
,
num_erased_ptr
,
out_dat
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
sequence_erase
,
paddle
::
operators
::
SequenceEraseOpCUDAKernel
<
int32_t
>
);
paddle/operators/sequence_erase_op.h
0 → 100644
浏览文件 @
dc488c17
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
SequenceEraseKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
auto
lod
=
in
->
lod
();
PADDLE_ENFORCE_EQ
(
lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
lod
[
0
].
back
(),
(
size_t
)
in
->
numel
(),
"The actual size mismatches with the LoD information."
);
auto
tokens
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"tokens"
);
auto
in_len
=
in
->
numel
();
auto
in_dat
=
in
->
data
<
T
>
();
auto
lod0
=
lod
[
0
];
std
::
vector
<
size_t
>
num_erased
(
in_len
+
1
,
0
);
std
::
vector
<
size_t
>
out_lod0
(
1
,
0
);
for
(
size_t
i
=
0
;
i
<
lod0
.
size
()
-
1
;
++
i
)
{
size_t
num_out
=
0
;
for
(
auto
j
=
lod0
[
i
]
+
1
;
j
<=
lod0
[
i
+
1
];
++
j
)
{
num_erased
[
j
]
=
num_erased
[
j
-
1
];
if
(
std
::
find
(
tokens
.
begin
(),
tokens
.
end
(),
in_dat
[
j
-
1
])
!=
tokens
.
end
())
{
num_erased
[
j
]
+=
1
;
}
else
{
num_out
+=
1
;
}
}
out_lod0
.
push_back
(
out_lod0
.
back
()
+
num_out
);
}
auto
out_len
=
in_len
-
num_erased
[
in_len
];
out
->
Resize
({
static_cast
<
int64_t
>
(
out_len
),
1
});
auto
out_dat
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int64_t
i
=
0
;
i
<
in_len
;
++
i
)
{
if
(
num_erased
[
i
]
==
num_erased
[
i
+
1
])
{
out_dat
[
i
-
num_erased
[
i
]]
=
in_dat
[
i
];
}
}
framework
::
LoD
out_lod
;
out_lod
.
push_back
(
out_lod0
);
out
->
set_lod
(
out_lod
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/operators/shrink_rnn_memory_op.cc
浏览文件 @
dc488c17
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/operators/array_operator.h"
#include "paddle/operators/math/math_function.h"
...
...
@@ -46,8 +47,21 @@ class ShrinkRNNMemoryOp : public ArrayOp {
auto
*
out_var
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output Out must be set"
);
auto
&
out_tensor
=
*
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
height
=
dst_num_rows
;
// do shrink for the top level LoD
if
(
x_tensor
.
lod
().
size
()
>
0
&&
x_tensor
.
lod
()[
0
].
size
()
>
static_cast
<
size_t
>
(
dst_num_rows
))
{
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
height
=
lod_offset
.
second
.
second
;
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
}
if
(
dst_num_rows
!=
0
)
{
out_tensor
.
ShareDataWith
(
x_tensor
.
Slice
(
0
,
dst_num_rows
));
out_tensor
.
ShareDataWith
(
x_tensor
.
Slice
(
0
,
height
));
}
}
};
...
...
@@ -64,11 +78,11 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Out"
,
"(LoDTensor) The shrinked RNN step memory."
);
AddComment
(
R"DOC(
In dynamic RNN, we are able to handle sequences of different lengths.
Because of the multiple lengths, the size of each step input can be
In dynamic RNN, we are able to handle sequences of different lengths.
Because of the multiple lengths, the size of each step input can be
different, which may lead to a mismatching between the input of
the current step and the memory generated by the previous one. This
operator shrinks memory according to the size of the next step input,
the current step and the memory generated by the previous one. This
operator shrinks memory according to the size of the next step input,
to make sure that they can match each other.
)DOC"
);
}
...
...
@@ -132,6 +146,7 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase {
PADDLE_ENFORCE
(
context
->
HasOutput
(
framework
::
GradVarName
(
"X"
)));
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
context
->
GetInputDim
(
"X"
));
context
->
ShareLoD
(
"X"
,
framework
::
GradVarName
(
"X"
));
}
};
...
...
paddle/scripts/submit_local.sh.in
浏览文件 @
dc488c17
...
...
@@ -92,6 +92,9 @@ function threads_config() {
if
[
-z
"
$OPENBLAS_NUM_THREADS
"
]
;
then
export
OPENBLAS_NUM_THREADS
=
$threads
fi
if
[
$threads
-gt
1
]
&&
[
-z
"
$OPENBLAS_MAIN_FREE
"
]
;
then
export
OPENBLAS_MAIN_FREE
=
1
fi
fi
}
...
...
python/paddle/v2/__init__.py
浏览文件 @
dc488c17
...
...
@@ -62,12 +62,15 @@ __all__ = [
cp
.
begin_parse
()
def
set_
omp_mkl_
env_vars
(
trainer_count
):
def
set_env_vars
(
trainer_count
):
'''Auto set CPU environment if have not set before.
export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status.
export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count.
For MKL:
export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status.
export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count.
For OpenBLAS:
export OPENBLAS_NUM_THREADS, OPENBLAS_MAIN_FREE according to trainer_count.
'''
import
platform
import
platform
,
paddle
if
not
platform
.
system
()
in
[
'Linux'
,
'Darwin'
]:
return
...
...
@@ -103,16 +106,22 @@ def set_omp_mkl_env_vars(trainer_count):
num_cores
=
num_physical_cores
()
num_processors
=
num_logical_processors
()
if
num_processors
>
num_cores
:
# Hyper Threading is enabled
set_env
(
"OMP_DYNAMIC"
,
"true"
)
set_env
(
"KMP_AFFINITY"
,
"granularity=fine,compact,1,0"
)
else
:
set_env
(
"OMP_DYNAMIC"
,
"false"
)
set_env
(
"KMP_AFFINITY"
,
"granularity=fine,compact,0,0"
)
if
paddle
.
version
.
mkl
()
==
'ON'
:
if
num_processors
>
num_cores
:
# Hyper Threading is enabled
set_env
(
"OMP_DYNAMIC"
,
"true"
)
set_env
(
"KMP_AFFINITY"
,
"granularity=fine,compact,1,0"
)
else
:
set_env
(
"OMP_DYNAMIC"
,
"false"
)
set_env
(
"KMP_AFFINITY"
,
"granularity=fine,compact,0,0"
)
threads
=
num_processors
/
trainer_count
threads
=
'1'
if
threads
<
1
else
str
(
threads
)
set_env
(
"OMP_NUM_THREADS"
,
threads
)
set_env
(
"MKL_NUM_THREADS"
,
threads
)
if
paddle
.
version
.
mkl
()
==
'ON'
:
set_env
(
"OMP_NUM_THREADS"
,
threads
)
set_env
(
"MKL_NUM_THREADS"
,
threads
)
else
:
set_env
(
"OPENBLAS_NUM_THREADS"
,
threads
)
if
threads
>
1
:
set_env
(
"OPENBLAS_MAIN_FREE"
,
'1'
)
def
init
(
**
kwargs
):
...
...
@@ -129,7 +138,7 @@ def init(**kwargs):
for
key
in
args_dict
.
keys
():
args
.
append
(
'--%s=%s'
%
(
key
,
str
(
args_dict
[
key
])))
set_
omp_mkl_
env_vars
(
kwargs
.
get
(
'trainer_count'
,
1
))
set_env_vars
(
kwargs
.
get
(
'trainer_count'
,
1
))
if
'use_gpu'
in
kwargs
:
cp
.
g_command_config_args
[
'use_gpu'
]
=
kwargs
[
'use_gpu'
]
...
...
python/paddle/v2/fluid/layers/nn.py
浏览文件 @
dc488c17
...
...
@@ -14,7 +14,7 @@ __all__ = [
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'sequence_pool'
,
'pool2d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'sequence_expand'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'sequence_first_step'
,
'sequence_last_step'
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
]
...
...
@@ -386,6 +386,21 @@ def cos_sim(X, Y, **kwargs):
return
out
def
dropout
(
x
,
dropout_prob
,
is_test
=
False
,
seed
=
0
,
**
kwargs
):
helper
=
LayerHelper
(
'dropout'
,
**
kwargs
)
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
mask
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
helper
.
append_op
(
type
=
'dropout'
,
inputs
=
{
'X'
:
[
x
]},
outputs
=
{
'Out'
:
[
out
],
'Mask'
:
[
mask
]},
attrs
=
{
'dropout_prob'
:
dropout_prob
,
'is_test'
:
is_test
,
'seed'
:
seed
})
return
out
def
cross_entropy
(
input
,
label
,
**
kwargs
):
"""
**Cross Entropy Layer**
...
...
python/paddle/v2/fluid/layers/ops.py
浏览文件 @
dc488c17
from
..registry
import
register_layer
__activations__
=
[
'abs'
,
'ceil'
,
'exp'
,
'floor'
,
'log'
,
'relu'
,
'round'
,
'sigmoid'
,
'sqrt'
,
'square'
,
'tanh'
,
'abs'
,
'tanh'
,
'sigmoid'
,
'relu'
,
'sqrt'
,
'ceil'
,
'floor'
,
'log'
,
'round'
]
__all__
=
[
'mean'
,
'mul'
,
'dropout'
,
'reshape'
,
'scale'
,
'transpose'
,
...
...
python/paddle/v2/fluid/tests/test_edit_distance_op.py
0 → 100644
浏览文件 @
dc488c17
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
def
Levenshtein
(
hyp
,
ref
):
""" Compute the Levenshtein distance between two strings.
:param hyp: hypothesis string in index
:type hyp: list
:param ref: reference string in index
:type ref: list
"""
m
=
len
(
hyp
)
n
=
len
(
ref
)
if
m
==
0
:
return
n
if
n
==
0
:
return
m
dist
=
np
.
zeros
((
m
+
1
,
n
+
1
)).
astype
(
"float32"
)
for
i
in
range
(
0
,
m
+
1
):
dist
[
i
][
0
]
=
i
for
j
in
range
(
0
,
n
+
1
):
dist
[
0
][
j
]
=
j
for
i
in
range
(
1
,
m
+
1
):
for
j
in
range
(
1
,
n
+
1
):
cost
=
0
if
hyp
[
i
-
1
]
==
ref
[
j
-
1
]
else
1
deletion
=
dist
[
i
-
1
][
j
]
+
1
insertion
=
dist
[
i
][
j
-
1
]
+
1
substitution
=
dist
[
i
-
1
][
j
-
1
]
+
cost
dist
[
i
][
j
]
=
min
(
deletion
,
insertion
,
substitution
)
return
dist
[
m
][
n
]
class
TestEditDistanceOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"edit_distance"
normalized
=
False
x1
=
np
.
array
([[
0
,
12
,
3
,
5
,
8
,
2
]]).
astype
(
"int32"
)
x2
=
np
.
array
([[
0
,
12
,
4
,
7
,
8
]]).
astype
(
"int32"
)
x1
=
np
.
transpose
(
x1
)
x2
=
np
.
transpose
(
x2
)
x1_lod
=
[
0
,
1
,
5
]
x2_lod
=
[
0
,
3
,
4
]
num_strs
=
len
(
x1_lod
)
-
1
distance
=
np
.
zeros
((
num_strs
,
1
)).
astype
(
"float32"
)
for
i
in
range
(
0
,
num_strs
):
distance
[
i
]
=
Levenshtein
(
hyp
=
x1
[
x1_lod
[
i
]:
x1_lod
[
i
+
1
]],
ref
=
x2
[
x2_lod
[
i
]:
x2_lod
[
i
+
1
]])
if
normalized
is
True
:
len_ref
=
x2_lod
[
i
+
1
]
-
x2_lod
[
i
]
distance
[
i
]
=
distance
[
i
]
/
len_ref
self
.
attrs
=
{
'normalized'
:
normalized
}
self
.
inputs
=
{
'Hyps'
:
(
x1
,
[
x1_lod
]),
'Refs'
:
(
x2
,
[
x2_lod
])}
self
.
outputs
=
{
'Out'
:
distance
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestEditDistanceOpNormalized
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"edit_distance"
normalized
=
True
x1
=
np
.
array
([[
0
,
10
,
3
,
6
,
5
,
8
,
2
]]).
astype
(
"int32"
)
x2
=
np
.
array
([[
0
,
10
,
4
,
6
,
7
,
8
]]).
astype
(
"int32"
)
x1
=
np
.
transpose
(
x1
)
x2
=
np
.
transpose
(
x2
)
x1_lod
=
[
0
,
1
,
3
,
6
]
x2_lod
=
[
0
,
2
,
3
,
5
]
num_strs
=
len
(
x1_lod
)
-
1
distance
=
np
.
zeros
((
num_strs
,
1
)).
astype
(
"float32"
)
for
i
in
range
(
0
,
num_strs
):
distance
[
i
]
=
Levenshtein
(
hyp
=
x1
[
x1_lod
[
i
]:
x1_lod
[
i
+
1
]],
ref
=
x2
[
x2_lod
[
i
]:
x2_lod
[
i
+
1
]])
if
normalized
is
True
:
len_ref
=
x2_lod
[
i
+
1
]
-
x2_lod
[
i
]
distance
[
i
]
=
distance
[
i
]
/
len_ref
self
.
attrs
=
{
'normalized'
:
normalized
}
self
.
inputs
=
{
'Hyps'
:
(
x1
,
[
x1_lod
]),
'Refs'
:
(
x2
,
[
x2_lod
])}
self
.
outputs
=
{
'Out'
:
distance
}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_sequence_erase_op.py
0 → 100644
浏览文件 @
dc488c17
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
def
sequence_erase
(
in_seq
,
lod0
,
tokens
):
new_lod0
=
[
0
]
out_seq
=
[]
for
i
in
range
(
0
,
len
(
lod0
)
-
1
):
num_out
=
0
for
dat
in
in_seq
[
lod0
[
i
]:
lod0
[
i
+
1
]]:
if
dat
not
in
tokens
:
out_seq
.
append
(
dat
)
num_out
+=
1
new_lod0
.
append
(
new_lod0
[
-
1
]
+
num_out
)
return
np
.
array
(
out_seq
).
astype
(
"int32"
),
new_lod0
class
TestSequenceEraseOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sequence_erase"
in_seq
=
np
.
random
.
randint
(
0
,
10
,
(
30
,
1
)).
astype
(
"int32"
)
lod
=
[[
0
,
9
,
13
,
24
,
30
]]
tokens
=
[
2
,
3
,
5
]
out_seq
,
new_lod0
=
sequence_erase
(
in_seq
,
lod
[
0
],
tokens
)
self
.
attrs
=
{
'tokens'
:
tokens
}
self
.
inputs
=
{
'X'
:
(
in_seq
,
lod
)}
self
.
outputs
=
{
'Out'
:
(
out_seq
,
[
new_lod0
])}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
浏览文件 @
dc488c17
...
...
@@ -3,43 +3,86 @@ import paddle.v2.fluid.core as core
from
paddle.v2.fluid.executor
import
Executor
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.backward
import
append_backward
from
paddle.v2.fluid.framework
import
default_main_program
import
numpy
from
paddle.v2.fluid.framework
import
default_main_program
,
switch_main_program
from
paddle.v2.fluid.framework
import
Program
import
numpy
as
np
main_program
=
default_main_program
()
class
TestShrinkRNNMemory
(
unittest
.
TestCase
):
def
test_shrink_rnn_memory
(
self
):
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
main_program
=
Program
()
switch_main_program
(
self
.
main_program
)
x
=
layers
.
data
(
'x'
,
shape
=
[
100
],
dtype
=
'float32'
)
x
.
stop_gradient
=
False
table
=
layers
.
lod_rank_table
(
x
=
x
)
rank_table_tensor
=
layers
.
data
(
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
table
=
layers
.
lod_rank_table
(
x
=
rank_table_tensor
)
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
mem1
=
layers
.
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
self
.
mem1
=
layers
.
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
mem2
=
layers
.
shrink_memory
(
x
=
mem1
,
i
=
i
,
table
=
table
)
self
.
mem2
=
layers
.
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
mem3
=
layers
.
shrink_memory
(
x
=
mem2
,
i
=
i
,
table
=
table
)
self
.
mem3
=
layers
.
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
mem3_mean
=
layers
.
mean
(
x
=
self
.
mem3
)
append_backward
(
loss
=
mem3_mean
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
def
sum_lodtensor
(
self
,
tensor
):
sum_res
=
0.0
for
i
in
xrange
(
np
.
product
(
tensor
.
get_dims
())):
sum_res
+=
tensor
.
get_float_element
(
i
)
return
sum_res
class
TestShrinkRNNMemoryReferLoD
(
TestShrinkRNNMemoryBase
):
def
test_refer_lod
(
self
):
cpu
=
core
.
CPUPlace
()
tensor
=
core
.
LoDTensor
()
tensor
.
set_lod
([[
0
,
2
,
5
,
6
]])
tensor_np
=
numpy
.
random
.
random
(
size
=
(
3
,
100
)).
astype
(
'float32'
)
tensor
.
set
(
tensor_np
,
cpu
)
x_tensor
=
core
.
LoDTensor
()
x_tensor
.
set_lod
([[
0
,
2
,
5
,
6
]])
tensor_np
=
np
.
random
.
random
(
size
=
(
6
,
100
)).
astype
(
'float32'
)
x_tensor
.
set
(
tensor_np
,
cpu
)
rank_table_tensor
=
core
.
LoDTensor
()
rank_table_tensor
.
set_lod
([[
0
,
1
,
3
,
6
]])
rank_table_tensor
.
set
(
np
.
random
.
random
(
size
=
(
6
,
1
)).
astype
(
'float32'
),
cpu
)
exe
=
Executor
(
cpu
)
outs
=
exe
.
run
(
feed
=
{
'x'
:
tensor
},
fetch_list
=
[
mem1
,
mem2
,
mem3
])
self
.
assertTrue
(
numpy
.
allclose
(
tensor_np
[
0
:
3
],
outs
[
0
]))
self
.
assertTrue
(
numpy
.
allclose
(
tensor_np
[
0
:
2
],
outs
[
1
]))
self
.
assertTrue
(
numpy
.
allclose
(
tensor_np
[
0
:
1
],
outs
[
2
]))
outs
=
exe
.
run
(
feed
=
{
'x'
:
x_tensor
,
'rank_table_tensor'
:
rank_table_tensor
},
fetch_list
=
[
self
.
mem1
,
self
.
mem2
,
self
.
mem3
,
self
.
x_grad
],
return_numpy
=
False
)
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
6
],
outs
[
0
]))
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
5
],
outs
[
1
]))
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
2
],
outs
[
2
]))
self
.
assertAlmostEqual
(
1.0
,
self
.
sum_lodtensor
(
outs
[
3
]),
delta
=
0.01
)
mem3_mean
=
layers
.
mean
(
x
=
mem3
)
append_backward
(
loss
=
mem3_mean
)
x_grad
=
exe
.
run
(
feed
=
{
'x'
:
tensor
},
fetch_list
=
[
main_program
.
global_block
().
var
(
'x@GRAD'
)])[
0
]
self
.
assertAlmostEqual
(
1.0
,
x_grad
.
sum
(),
delta
=
0.1
)
class
TestShrinkRNNMemoryNoLoD
(
TestShrinkRNNMemoryBase
):
def
test_no_lod
(
self
):
cpu
=
core
.
CPUPlace
()
x_tensor
=
core
.
LoDTensor
()
tensor_np
=
np
.
random
.
random
(
size
=
(
3
,
100
)).
astype
(
'float32'
)
x_tensor
.
set
(
tensor_np
,
cpu
)
rank_table_tensor
=
core
.
LoDTensor
()
rank_table_tensor
.
set_lod
([[
0
,
1
,
3
,
6
]])
rank_table_tensor
.
set
(
np
.
random
.
random
(
size
=
(
6
,
1
)).
astype
(
'float32'
),
cpu
)
exe
=
Executor
(
cpu
)
outs
=
exe
.
run
(
feed
=
{
'x'
:
x_tensor
,
'rank_table_tensor'
:
rank_table_tensor
},
fetch_list
=
[
self
.
mem1
,
self
.
mem2
,
self
.
mem3
,
self
.
x_grad
],
return_numpy
=
False
)
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
3
],
outs
[
0
]))
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
2
],
outs
[
1
]))
self
.
assertTrue
(
np
.
allclose
(
tensor_np
[
0
:
1
],
outs
[
2
]))
self
.
assertAlmostEqual
(
1.0
,
self
.
sum_lodtensor
(
outs
[
3
]),
delta
=
0.01
)
if
__name__
==
'__main__'
:
...
...
python/setup.py.in
浏览文件 @
dc488c17
...
...
@@ -31,6 +31,7 @@ patch = '%(patch)d'
rc = '%(rc)d'
istaged = %(istaged)s
commit = '%(commit)s'
with_mkl = '%(with_mkl)s'
def show():
if istaged:
...
...
@@ -41,6 +42,9 @@ def show():
print 'rc:', rc
else:
print 'commit:', commit
def mkl():
return with_mkl
'''
commit = git_commit()
with open(filename, 'w') as f:
...
...
@@ -51,7 +55,8 @@ def show():
'rc': RC,
'version': '${PADDLE_VERSION}',
'commit': commit,
'istaged': ISTAGED})
'istaged': ISTAGED,
'with_mkl': '@WITH_MKL@'})
write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录