Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
aaf818f5
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aaf818f5
编写于
2月 23, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/add_fwd_block_id
上级
65058cfb
bf9ed4a9
变更
20
展开全部
隐藏空白更改
内联
并排
Showing
20 changed file
with
759 addition
and
308 deletion
+759
-308
doc/design/parallel_do.md
doc/design/parallel_do.md
+2
-2
paddle/fluid/framework/block_desc.cc
paddle/fluid/framework/block_desc.cc
+19
-0
paddle/fluid/framework/block_desc.h
paddle/fluid/framework/block_desc.h
+2
-0
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+73
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+4
-1
paddle/fluid/framework/var_desc.cc
paddle/fluid/framework/var_desc.cc
+52
-2
paddle/fluid/framework/var_desc.h
paddle/fluid/framework/var_desc.h
+4
-0
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+6
-0
paddle/fluid/operators/elementwise_add_op.h
paddle/fluid/operators/elementwise_add_op.h
+5
-57
paddle/fluid/operators/elementwise_op_function.h
paddle/fluid/operators/elementwise_op_function.h
+256
-2
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+12
-48
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+6
-11
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+11
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-0
python/paddle/trainer_config_helpers/layer_math.py
python/paddle/trainer_config_helpers/layer_math.py
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
...r_config_helpers/tests/configs/protostr/math_ops.protostr
+1
-2
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+2
-0
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+221
-177
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+72
-0
python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
...ests/book_distribute/notest_recognize_digits_conv_dist.py
+9
-4
未找到文件。
doc/design/parallel_do.md
浏览文件 @
aaf818f5
...
...
@@ -24,7 +24,7 @@ A vanilla implementation of parallel_do can be shown as the following (`|` means
```
In the forward pass
| Split input onto different devices
| Copy parameter
to
onto different devices
| Copy parameter onto different devices
|||| Compute forward pass in parallel
| Merge output from different devices
...
...
@@ -87,7 +87,7 @@ block2 {
}
```
## P
ro
formance Imporvement
## P
er
formance Imporvement
There are serial places we can make this parallel_do faster.
...
...
paddle/fluid/framework/block_desc.cc
浏览文件 @
aaf818f5
...
...
@@ -44,6 +44,25 @@ bool BlockDesc::HasVar(const std::string &name) const {
return
vars_
.
find
(
name
)
!=
vars_
.
end
();
}
VarDesc
*
BlockDesc
::
RenameVar
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
)
{
if
(
!
this
->
HasVar
(
old_name
))
{
return
nullptr
;
}
need_update_
=
true
;
auto
*
var
=
this
->
Var
(
old_name
);
VarDesc
*
new_var
=
new
VarDesc
(
*
(
var
->
Proto
()));
new_var
->
SetName
(
new_name
);
vars_
[
new_name
].
reset
(
new_var
);
// rename inputs and outputs
for
(
const
auto
&
op
:
ops_
)
{
auto
*
it
=
op
.
get
();
it
->
Rename
(
old_name
,
new_name
);
}
vars_
.
erase
(
old_name
);
return
new_var
;
}
VarDesc
*
BlockDesc
::
FindVarRecursive
(
const
std
::
string
&
name
)
const
{
if
(
name
==
kEmptyVarName
)
return
nullptr
;
...
...
paddle/fluid/framework/block_desc.h
浏览文件 @
aaf818f5
...
...
@@ -57,6 +57,8 @@ class BlockDesc {
bool
HasVar
(
const
std
::
string
&
var_name
)
const
;
VarDesc
*
RenameVar
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
VarDesc
*
FindVarRecursive
(
const
std
::
string
&
name_bytes
)
const
;
VarDesc
&
FindRecursiveOrCreateVar
(
const
std
::
string
&
name_bytes
);
...
...
paddle/fluid/framework/channel.h
浏览文件 @
aaf818f5
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -51,6 +53,77 @@ void CloseChannel(Channel<T>* ch) {
ch
->
Close
();
}
/*
* The ChannelHolder class serves two main purposes:
* 1. It acts as a unified wrapper for the different kinds of
* channels, i.e. Buffered and Unbuffered channels. This is
* similar to the ReaderHolder class.
* 2. It also helps us in TypeHiding. This is similar to the
* PlaceHolder implementations in variable.h and tensor.h.
*/
class
ChannelHolder
{
public:
template
<
typename
T
>
void
Reset
(
size_t
buffer_size
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
buffer_size
));
}
template
<
typename
T
>
bool
Send
(
T
*
data
)
{
if
(
!
IsInitialized
())
return
false
;
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
std
::
type_index
(
typeid
(
T
)));
// Static cast should be safe because we have ensured that types are same
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
return
channel
!=
nullptr
?
channel
->
Send
(
data
)
:
false
;
}
template
<
typename
T
>
bool
Receive
(
T
*
data
)
{
if
(
!
IsInitialized
())
return
false
;
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
std
::
type_index
(
typeid
(
T
)));
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
return
channel
!=
nullptr
?
channel
->
Receive
(
data
)
:
false
;
}
void
close
()
{
if
(
IsInitialized
())
holder_
->
Close
();
}
inline
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of ChannelHolder.
*/
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_index
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
void
Close
()
const
=
0
;
std
::
type_info
type_
;
};
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
}
virtual
const
std
::
type_index
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
channel_
.
get
());
}
virtual
void
Close
()
{
if
(
channel_
)
channel_
->
Close
();
}
std
::
unique_ptr
<
Channel
<
T
>*>
channel_
;
const
std
::
type_index
type_
;
};
// Pointer to a PlaceholderImpl object
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
aaf818f5
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <set>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
...
...
@@ -55,13 +56,15 @@ static void CreateTensor(Variable* var, proto::VarType::Type var_type) {
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
CHANNEL
)
{
var
->
GetMutable
<
ChannelHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
NCCL_COM
)
{
// GetMutable will be called in ncclInit
}
else
{
PADDLE_THROW
(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, NCCL_COM]"
,
"LOD_RANK_TABLE, PLACE_LIST, READER,
CHANNEL,
NCCL_COM]"
,
var_type
);
}
}
...
...
paddle/fluid/framework/var_desc.cc
浏览文件 @
aaf818f5
...
...
@@ -88,7 +88,13 @@ std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
}
void
VarDesc
::
SetDataType
(
proto
::
VarType
::
Type
data_type
)
{
mutable_tensor_desc
()
->
set_data_type
(
data_type
);
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
mutable_channel_desc
()
->
set_data_type
(
data_type
);
break
;
default:
mutable_tensor_desc
()
->
set_data_type
(
data_type
);
}
}
void
VarDesc
::
SetDataTypes
(
...
...
@@ -109,7 +115,13 @@ void VarDesc::SetDataTypes(
}
proto
::
VarType
::
Type
VarDesc
::
GetDataType
()
const
{
return
tensor_desc
().
data_type
();
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
return
channel_desc
().
data_type
();
break
;
default:
return
tensor_desc
().
data_type
();
}
}
std
::
vector
<
proto
::
VarType
::
Type
>
VarDesc
::
GetDataTypes
()
const
{
...
...
@@ -122,6 +134,17 @@ std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
return
res
;
}
void
VarDesc
::
SetCapacity
(
int64_t
capacity
)
{
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
desc_
.
mutable_type
()
->
mutable_channel
()
->
set_capacity
(
capacity
);
break
;
default:
PADDLE_THROW
(
"Setting 'capacity' is not supported by the type of var %s."
,
this
->
Name
());
}
}
void
VarDesc
::
SetLoDLevel
(
int32_t
lod_level
)
{
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
LOD_TENSOR
:
...
...
@@ -191,6 +214,19 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
}
}
const
proto
::
VarType
::
ChannelDesc
&
VarDesc
::
channel_desc
()
const
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var's type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
return
desc_
.
type
().
channel
();
default:
PADDLE_THROW
(
"Getting 'channel_desc' is not supported by the type of var %s."
,
this
->
Name
());
}
}
const
proto
::
VarType
::
TensorDesc
&
VarDesc
::
tensor_desc
()
const
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var's type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
...
...
@@ -226,6 +262,20 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
}
}
proto
::
VarType
::
ChannelDesc
*
VarDesc
::
mutable_channel_desc
()
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
return
desc_
.
mutable_type
()
->
mutable_channel
();
default:
PADDLE_THROW
(
"Getting 'mutable_channel_desc' is not supported by the type of var "
"%s."
,
this
->
Name
());
}
}
proto
::
VarType
::
TensorDesc
*
VarDesc
::
mutable_tensor_desc
()
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
...
...
paddle/fluid/framework/var_desc.h
浏览文件 @
aaf818f5
...
...
@@ -85,6 +85,8 @@ class VarDesc {
void
SetDataTypes
(
const
std
::
vector
<
proto
::
VarType
::
Type
>
&
multiple_data_type
);
void
SetCapacity
(
int64_t
capacity
);
proto
::
VarType
::
Type
GetDataType
()
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetDataTypes
()
const
;
...
...
@@ -106,8 +108,10 @@ class VarDesc {
void
SetPersistable
(
bool
persistable
)
{
desc_
.
set_persistable
(
persistable
);
}
private:
const
proto
::
VarType
::
ChannelDesc
&
channel_desc
()
const
;
const
proto
::
VarType
::
TensorDesc
&
tensor_desc
()
const
;
std
::
vector
<
proto
::
VarType
::
TensorDesc
>
tensor_descs
()
const
;
proto
::
VarType
::
ChannelDesc
*
mutable_channel_desc
();
proto
::
VarType
::
TensorDesc
*
mutable_tensor_desc
();
std
::
vector
<
proto
::
VarType
::
TensorDesc
*>
mutable_tensor_descs
();
...
...
paddle/fluid/framework/var_type.h
浏览文件 @
aaf818f5
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -34,6 +35,8 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
type
.
hash_code
()
==
typeid
(
ReaderHolder
).
hash_code
())
{
return
proto
::
VarType_Type_READER
;
}
else
if
(
type
.
hash_code
()
==
typeid
(
ChannelHolder
).
hash_code
())
{
return
proto
::
VarType_Type_CHANNEL
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
}
...
...
@@ -57,6 +60,9 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
case
proto
::
VarType_Type_READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
case
proto
::
VarType_Type_CHANNEL
:
visitor
(
var
.
Get
<
ChannelHolder
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %d"
,
ToVarType
(
var
.
Type
()));
}
...
...
paddle/fluid/operators/elementwise_add_op.h
浏览文件 @
aaf818f5
...
...
@@ -41,59 +41,8 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
};
template
<
typename
T
>
struct
ElementwiseAddGradFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
Z
,
typename
dX
,
typename
dY
,
typename
dZ
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
Z
z
,
dX
dx
,
dY
dy
,
dZ
dz
)
{
auto
dz_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dz
);
if
(
dx
)
{
auto
dx_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dx
);
dx_e
.
device
(
d
)
=
dz_e
;
}
if
(
dy
)
{
auto
dy_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dy
);
dy_e
.
device
(
d
)
=
dz_e
;
}
}
};
template
<
typename
T
>
struct
ElementwiseAddBroadCastGradFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
Z
,
typename
dX
,
typename
dY
,
typename
dZ
,
typename
Pre
,
typename
N
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
Z
z
,
dX
dx
,
dY
dy
,
dZ
dz
,
Pre
pre
,
N
n
)
{
auto
dz_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dz
);
if
(
dx
)
{
auto
dx_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dx
);
dx_e
.
device
(
d
)
=
dz_e
;
}
if
(
dy
)
{
auto
dy_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dy
);
dy_e
.
device
(
d
)
=
dz_e
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
pre
,
n
))
.
sum
(
Eigen
::
array
<
int
,
1
>
{{
0
}});
}
}
};
template
<
typename
T
>
struct
ElementwiseAddBroadCast2GradFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
Z
,
typename
dX
,
typename
dY
,
typename
dZ
,
typename
Pre
,
typename
N
,
typename
Post
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
Z
z
,
dX
dx
,
dY
dy
,
dZ
dz
,
Pre
pre
,
N
n
,
Post
post
)
{
auto
dz_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dz
);
if
(
dx
)
{
auto
dx_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dx
);
dx_e
.
device
(
d
)
=
dz_e
;
}
if
(
dy
)
{
auto
dy_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dy
);
dy_e
.
device
(
d
)
=
dz_e
.
reshape
(
Eigen
::
DSizes
<
int
,
3
>
(
pre
,
n
,
post
))
.
sum
(
Eigen
::
array
<
int
,
2
>
{{
0
,
2
}});
}
}
struct
IdentityGrad
{
HOSTDEVICE
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
dout
;
}
};
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -109,10 +58,9 @@ class ElementwiseAddGradKernel : public framework::OpKernel<T> {
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
ElementwiseGradCompute
<
DeviceContext
,
T
,
ElementwiseAddGradFunctor
<
T
>
,
ElementwiseAddBroadCastGradFunctor
<
T
>
,
ElementwiseAddBroadCast2GradFunctor
<
T
>>
(
ctx
,
x
,
y
,
out
,
dout
,
axis
,
dx
,
dy
);
ElemwiseGradCompute
<
DeviceContext
,
T
,
IdentityGrad
<
T
>
,
IdentityGrad
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
IdentityGrad
<
T
>
(),
IdentityGrad
<
T
>
());
}
};
...
...
paddle/fluid/operators/elementwise_op_function.h
浏览文件 @
aaf818f5
...
...
@@ -20,9 +20,11 @@ limitations under the License. */
#ifdef __NVCC__
#include <thrust/iterator/iterator_adaptor.h>
constexpr
int
ELEMWISE_MAX_BLOCK_DIM
=
1024
;
#endif
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -33,10 +35,10 @@ namespace operators {
* For example:
* 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
* pre=2, n=3*4, post=5
* x.shape(2, 12, 5) * y.shape(1,
12,1).broadcast(2,12,
5)
* x.shape(2, 12, 5) * y.shape(1,
12, 1).broadcast(2, 12,
5)
* 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
* pre=2*3, n=4*5, post=1
* x.shape(
2, 3, 20) * y.shape(1,1,20).broadcast(2,3,20
)
* x.shape(
6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1
)
*/
inline
void
get_mid_dims
(
const
framework
::
DDim
&
x_dims
,
const
framework
::
DDim
&
y_dims
,
const
int
axis
,
...
...
@@ -311,6 +313,258 @@ EIGEN_FUNCTOR(Mul, EIGEN_MUL);
#define EIGEN_DIV(x, y) ((x) / (y))
EIGEN_FUNCTOR
(
Div
,
EIGEN_DIV
);
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
struct
ElemwiseGradNoBroadcast
{
const
T
*
x_
;
const
T
*
y_
;
const
T
*
out_
;
const
T
*
dout_
;
HOSTDEVICE
void
operator
()(
size_t
i
)
{
if
(
dx_
!=
nullptr
)
{
dx_
[
i
]
=
dx_op_
(
x_
[
i
],
y_
[
i
],
out_
[
i
],
dout_
[
i
]);
}
if
(
dy_
!=
nullptr
)
{
dy_
[
i
]
=
dx_op_
(
x_
[
i
],
y_
[
i
],
out_
[
i
],
dout_
[
i
]);
}
}
DX_OP
dx_op_
;
DY_OP
dy_op_
;
T
*
dx_
;
T
*
dy_
;
};
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
void
ElemwiseGradBroadcast1CPU
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
h
,
int
w
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
for
(
int
i
=
0
;
i
<
h
;
++
i
)
{
for
(
int
j
=
0
;
j
<
w
;
++
j
)
{
int
x_offset
=
i
*
w
+
j
;
if
(
dx
!=
nullptr
)
{
dx
[
x_offset
]
=
dx_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
if
(
dy
!=
nullptr
)
{
T
tmp
=
dy_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
if
(
i
==
0
)
{
dy
[
j
]
=
tmp
;
}
else
{
dy
[
j
]
+=
tmp
;
}
}
}
}
}
#ifdef __NVCC__
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
__global__
void
ElemwiseGradBroadcast1CUDAKernel
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
h
,
int
w
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
extern
__shared__
char
shm_buffer
[];
T
*
shm
=
reinterpret_cast
<
T
*>
(
shm_buffer
);
int
j
=
blockIdx
.
x
;
int
i
=
threadIdx
.
x
;
int
tid
=
threadIdx
.
x
;
shm
[
tid
]
=
0
;
do
{
int
x_offset
=
i
*
w
+
j
;
if
(
dx
)
{
dx
[
x_offset
]
=
dx_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
if
(
dy
)
{
shm
[
tid
]
+=
dy_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
i
+=
ELEMWISE_MAX_BLOCK_DIM
;
}
while
(
i
<
h
);
if
(
dy
)
{
__syncthreads
();
h
=
h
>
ELEMWISE_MAX_BLOCK_DIM
?
ELEMWISE_MAX_BLOCK_DIM
:
h
;
// Sum, could be optimized
if
(
threadIdx
.
x
==
0
)
{
for
(
int
k
=
1
;
k
<
h
;
++
k
)
{
shm
[
0
]
+=
shm
[
k
];
}
dy
[
j
]
=
shm
[
0
];
}
}
}
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
void
ElemwiseGradBroadcast1CUDA
(
cudaStream_t
stream
,
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
h
,
int
w
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
h
);
int
gird_size
=
w
;
int
shared_mem_size
=
block_size
*
sizeof
(
T
);
ElemwiseGradBroadcast1CUDAKernel
<<<
gird_size
,
block_size
,
shared_mem_size
,
stream
>>>
(
x
,
y
,
out
,
dout
,
h
,
w
,
dx_op
,
dy_op
,
dx
,
dy
);
}
#endif
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
void
ElemwiseGradBroadcast2CPU
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
pre
,
int
n
,
int
post
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
for
(
int
i
=
0
;
i
<
pre
;
++
i
)
{
for
(
int
j
=
0
;
j
<
n
;
++
j
)
{
for
(
int
k
=
0
;
k
<
post
;
++
k
)
{
int
x_offset
=
i
*
n
*
post
+
j
*
post
+
k
;
if
(
dx
!=
nullptr
)
{
dx
[
x_offset
]
=
dx_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
if
(
dy
!=
nullptr
)
{
T
tmp
=
dy_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
if
(
i
==
0
&&
k
==
0
)
{
dy
[
j
]
=
tmp
;
}
else
{
dy
[
j
]
+=
tmp
;
}
}
}
}
}
}
#ifdef __NVCC__
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
__global__
void
ElemwiseGradBroadcast2CUDAKernel
(
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
pre
,
int
n
,
int
post
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
int
tid
=
threadIdx
.
x
;
int
j
=
blockIdx
.
x
;
extern
__shared__
char
shm_buffer
[];
T
*
shm
=
reinterpret_cast
<
T
*>
(
shm_buffer
);
shm
[
tid
]
=
0
;
int
ttid
=
tid
;
while
(
true
)
{
int
i
=
ttid
/
post
;
int
k
=
ttid
%
post
;
if
(
i
>=
pre
)
break
;
int
x_offset
=
i
*
n
*
post
+
j
*
post
+
k
;
if
(
dx
!=
nullptr
)
{
dx
[
x_offset
]
=
dx_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
if
(
dy
!=
nullptr
)
{
shm
[
tid
]
+=
dy_op
(
x
[
x_offset
],
y
[
j
],
out
[
x_offset
],
dout
[
x_offset
]);
}
ttid
+=
ELEMWISE_MAX_BLOCK_DIM
;
}
if
(
dy
)
{
__syncthreads
();
int
h
=
pre
*
post
;
h
=
h
>
ELEMWISE_MAX_BLOCK_DIM
?
ELEMWISE_MAX_BLOCK_DIM
:
h
;
// Sum, could be optimized
if
(
tid
==
0
)
{
for
(
int
i
=
1
;
i
<
h
;
++
i
)
{
shm
[
0
]
+=
shm
[
i
];
}
dy
[
j
]
=
shm
[
0
];
}
}
}
template
<
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
static
void
ElemwiseGradBroadcast2CUDA
(
cudaStream_t
stream
,
const
T
*
x
,
const
T
*
y
,
const
T
*
out
,
const
T
*
dout
,
int
pre
,
int
n
,
int
post
,
DX_OP
dx_op
,
DY_OP
dy_op
,
T
*
dx
,
T
*
dy
)
{
int
block_size
=
std
::
min
(
ELEMWISE_MAX_BLOCK_DIM
,
pre
*
post
);
int
gird_size
=
n
;
int
shared_mem_size
=
block_size
*
sizeof
(
T
);
ElemwiseGradBroadcast2CUDAKernel
<<<
gird_size
,
block_size
,
shared_mem_size
,
stream
>>>
(
x
,
y
,
out
,
dout
,
pre
,
n
,
post
,
dx_op
,
dy_op
,
dx
,
dy
);
}
#endif
template
<
typename
DeviceContext
,
typename
T
,
typename
DX_OP
,
typename
DY_OP
>
void
ElemwiseGradCompute
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
&
x
,
const
framework
::
Tensor
&
y
,
const
framework
::
Tensor
&
out
,
const
framework
::
Tensor
&
dout
,
int
axis
,
framework
::
Tensor
*
dx
,
framework
::
Tensor
*
dy
,
DX_OP
dx_op
,
DY_OP
dy_op
)
{
if
(
x
.
dims
()
==
y
.
dims
())
{
size_t
N
=
static_cast
<
size_t
>
(
framework
::
product
(
x
.
dims
()));
platform
::
ForRange
<
DeviceContext
>
for_range
(
ctx
.
template
device_context
<
DeviceContext
>(),
N
);
for_range
(
ElemwiseGradNoBroadcast
<
T
,
DX_OP
,
DY_OP
>
{
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
dx_op
,
dy_op
,
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
dy
==
nullptr
?
nullptr
:
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())});
}
else
{
// Y is a scalar
auto
x_dim
=
x
.
dims
();
auto
y_dim
=
y
.
dims
();
if
(
y_dim
.
size
()
==
1
&&
y_dim
[
0
]
==
1
)
{
// y is a scalar
auto
extended_dims
=
framework
::
vectorize
(
x_dim
);
extended_dims
.
push_back
(
1
);
x_dim
=
framework
::
make_ddim
(
extended_dims
);
}
axis
=
(
axis
==
-
1
?
x_dim
.
size
()
-
y_dim
.
size
()
:
axis
);
int
pre
,
n
,
post
;
get_mid_dims
(
x_dim
,
y_dim
,
axis
,
pre
,
n
,
post
);
if
(
post
==
1
)
{
int
h
=
pre
;
int
w
=
n
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
#ifdef __NVCC__
ElemwiseGradBroadcast1CUDA
(
ctx
.
template
device_context
<
DeviceContext
>().
stream
(),
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
h
,
w
,
dx_op
,
dy_op
,
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
dy
==
nullptr
?
nullptr
:
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
#endif
}
else
{
ElemwiseGradBroadcast1CPU
(
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
h
,
w
,
dx_op
,
dy_op
,
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
dy
==
nullptr
?
nullptr
:
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
}
else
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
#ifdef __NVCC__
ElemwiseGradBroadcast2CUDA
(
ctx
.
template
device_context
<
DeviceContext
>().
stream
(),
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
pre
,
n
,
post
,
dx_op
,
dy_op
,
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
dy
==
nullptr
?
nullptr
:
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
#endif
}
else
{
ElemwiseGradBroadcast2CPU
(
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
out
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
pre
,
n
,
post
,
dx_op
,
dy_op
,
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
dy
==
nullptr
?
nullptr
:
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
}
}
};
template
<
typename
DeviceContext
,
typename
T
,
typename
functor
,
typename
broadcastfunctor
,
typename
broadcast2functor
>
void
ElementwiseGradCompute
(
const
framework
::
ExecutionContext
&
ctx
,
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
aaf818f5
...
...
@@ -75,13 +75,6 @@ class ListenAndServOp : public framework::OperatorBase {
server_thread_
->
join
();
}
std
::
string
GetGradVarNameForTrainer
(
const
std
::
string
&
varname
)
const
{
if
(
grads_counter_
.
find
(
varname
)
==
grads_counter_
.
end
())
{
grads_counter_
[
varname
]
=
0
;
}
return
string
::
Sprintf
(
"%s.trainer_%d"
,
varname
,
grads_counter_
[
varname
]
++
);
}
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
...
...
@@ -91,8 +84,7 @@ class ListenAndServOp : public framework::OperatorBase {
// FIXME(Yancey1989): initialize rpc server with lazy mode.
rpc_service_
->
SetScope
(
&
recv_scope
);
rpc_service_
->
SetDevCtx
(
&
dev_ctx
);
auto
param_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"ParamList"
);
auto
grad_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"GradList"
);
auto
ins
=
Inputs
(
"X"
);
auto
fan_in
=
Attr
<
int
>
(
"Fanin"
);
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
...
...
@@ -109,40 +101,24 @@ class ListenAndServOp : public framework::OperatorBase {
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_
->
SetCond
(
0
);
size_t
recv_var_cnt
=
0
;
size_t
update_param_cnt
=
0
;
int
batch_barrier
=
0
;
while
(
batch_barrier
!=
fan_in
)
{
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
auto
grad
_var_name
=
v
.
first
;
if
(
grad
_var_name
==
LISTEN_TERMINATE_MESSAGE
)
{
auto
recv
_var_name
=
v
.
first
;
if
(
recv
_var_name
==
LISTEN_TERMINATE_MESSAGE
)
{
LOG
(
INFO
)
<<
"received terminate message and exit"
;
exit_flag
=
true
;
break
;
}
else
if
(
grad
_var_name
==
BATCH_BARRIER_MESSAGE
)
{
}
else
if
(
recv
_var_name
==
BATCH_BARRIER_MESSAGE
)
{
VLOG
(
3
)
<<
"recv batch barrier message"
;
batch_barrier
++
;
continue
;
}
else
{
// receive a variable
VLOG
(
3
)
<<
"received grad: "
<<
recv_var_name
;
recv_var_cnt
++
;
auto
it
=
std
::
find
(
grad_list
.
begin
(),
grad_list
.
end
(),
grad_var_name
);
std
::
string
param_var_name
;
if
(
it
!=
grad_list
.
end
())
{
param_var_name
=
param_list
[
it
-
grad_list
.
begin
()];
update_param_cnt
++
;
VLOG
(
3
)
<<
"received grad: "
<<
grad_var_name
<<
" updating param: "
<<
param_var_name
;
}
else
{
VLOG
(
3
)
<<
"received variable: "
<<
grad_var_name
<<
" no need to update param"
;
}
if
(
fan_in
>
1
&&
!
param_var_name
.
empty
())
{
grad_var_name
=
this
->
GetGradVarNameForTrainer
(
grad_var_name
);
}
auto
*
var
=
recv_scope
.
FindVar
(
grad_var_name
);
auto
*
var
=
recv_scope
.
FindVar
(
recv_var_name
);
if
(
var
==
nullptr
)
{
LOG
(
ERROR
)
<<
"Can not find server side var: "
<<
grad
_var_name
;
LOG
(
ERROR
)
<<
"Can not find server side var: "
<<
recv
_var_name
;
PADDLE_THROW
(
"Can not find server side var"
);
}
detail
::
DeserializeFromMessage
(
v
.
second
,
dev_ctx
,
var
);
...
...
@@ -151,29 +127,26 @@ class ListenAndServOp : public framework::OperatorBase {
}
}
}
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
if
(
exit_flag
)
{
rpc_service_
->
ShutDown
();
}
VLOG
(
3
)
<<
"run optimize graph..."
;
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
false
/*create_local_scope*/
,
false
/*create_vars*/
);
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
// Reset the received sparse variables, the sum operator would not
// sum the input sparse variables which rows is empty at the next
// mini-batch.
// TO
OD
(Yancey1989): move the reset action into an operator, we couldn't
// TO
DO
(Yancey1989): move the reset action into an operator, we couldn't
// have any hide logic in the operator.
for
(
auto
&
var
:
sparse_vars
)
{
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
}
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
WaitClientGet
(
update_param_cnt
);
grads_counter_
.
clear
(
);
// FIXME(typhoonzero): use another condition to sync wait clients get.
rpc_service_
->
WaitClientGet
(
ins
.
size
()
);
sparse_vars
.
clear
();
}
// while(true)
}
...
...
@@ -181,13 +154,13 @@ class ListenAndServOp : public framework::OperatorBase {
protected:
std
::
shared_ptr
<
detail
::
AsyncGRPCServer
>
rpc_service_
;
std
::
shared_ptr
<
std
::
thread
>
server_thread_
;
mutable
std
::
unordered_map
<
std
::
string
,
int
>
grads_counter_
;
};
class
ListenAndServOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
ListenAndServOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) Variables that server recv."
).
AsDuplicable
();
AddComment
(
R"DOC(
ListenAndServ operator
...
...
@@ -201,16 +174,7 @@ from send_op and send back variables to recv_op.
.
AddCustomChecker
([](
const
std
::
string
&
ip
)
{
return
!
ip
.
empty
();
});
AddAttr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
,
"BlockID to run on server side."
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"ParamList"
,
"type list of string"
,
"grad->param name mapping to find which parameters to optimize."
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"GradList"
,
"type list of string"
,
"grad->param name mapping to find which parameters to optimize."
)
.
SetDefault
({});
AddAttr
<
int
>
(
"Fanin"
,
"type int"
,
"Number of trainers in the current cluster job"
)
AddAttr
<
int
>
(
"Fanin"
,
"How many clients send to this server."
)
.
SetDefault
(
1
);
}
};
...
...
paddle/fluid/platform/float16.h
浏览文件 @
aaf818f5
...
...
@@ -74,17 +74,12 @@ struct PADDLE_ALIGN(2) float16 {
// The following defaulted special class member functions
// are added to make float16 pass the std::is_trivial test
HOSTDEVICE
inline
float16
()
=
default
;
HOSTDEVICE
inline
float16
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
~
float16
()
=
default
;
float16
()
=
default
;
float16
(
const
float16
&
o
)
=
default
;
float16
&
operator
=
(
const
float16
&
o
)
=
default
;
float16
(
float16
&&
o
)
=
default
;
float16
&
operator
=
(
float16
&&
o
)
=
default
;
~
float16
()
=
default
;
// Constructors
#ifdef PADDLE_CUDA_FP16
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
aaf818f5
...
...
@@ -172,6 +172,14 @@ void BindBlockDesc(py::module &m) {
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
return
self
.
HasVar
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"rename_var"
,
[](
BlockDesc
&
self
,
const
py
::
bytes
&
byte_name
,
const
py
::
bytes
&
byte_name_new
)
{
std
::
string
name
=
byte_name
;
std
::
string
new_name
=
byte_name_new
;
self
.
RenameVar
(
name
,
new_name
);
})
.
def
(
"has_var_recursive"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
...
...
@@ -200,7 +208,7 @@ void BindVarDsec(py::module &m) {
py
::
class_
<
VarDesc
>
var_desc
(
m
,
"VarDesc"
,
""
);
var_desc
.
def
(
"name"
,
[](
const
VarDesc
&
self
)
{
[](
VarDesc
&
self
)
{
py
::
bytes
name
=
self
.
Name
();
return
name
;
},
...
...
@@ -210,6 +218,7 @@ void BindVarDsec(py::module &m) {
.
def
(
"set_shapes"
,
&
VarDesc
::
SetShapes
)
.
def
(
"set_dtype"
,
&
VarDesc
::
SetDataType
)
.
def
(
"set_dtypes"
,
&
VarDesc
::
SetDataTypes
)
.
def
(
"set_capacity"
,
&
VarDesc
::
SetCapacity
)
.
def
(
"shape"
,
&
VarDesc
::
GetShape
,
py
::
return_value_policy
::
reference
)
.
def
(
"shapes"
,
&
VarDesc
::
GetShapes
,
py
::
return_value_policy
::
reference
)
.
def
(
"dtype"
,
&
VarDesc
::
GetDataType
,
py
::
return_value_policy
::
reference
)
...
...
@@ -240,6 +249,7 @@ void BindVarDsec(py::module &m) {
.
value
(
"STEP_SCOPES"
,
proto
::
VarType
::
STEP_SCOPES
)
.
value
(
"LOD_RANK_TABLE"
,
proto
::
VarType
::
LOD_RANK_TABLE
)
.
value
(
"LOD_TENSOR_ARRAY"
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
.
value
(
"CHANNEL"
,
proto
::
VarType
::
CHANNEL
)
.
value
(
"PLACE_LIST"
,
proto
::
VarType
::
PLACE_LIST
)
.
value
(
"READER"
,
proto
::
VarType
::
READER
)
.
value
(
"NCCL_COM"
,
proto
::
VarType
::
NCCL_COM
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
aaf818f5
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <mutex> // for call_once
#include <unordered_map>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
python/paddle/trainer_config_helpers/layer_math.py
浏览文件 @
aaf818f5
...
...
@@ -75,7 +75,7 @@ LayerOutput.__add__ = add
def
sub
(
layeroutput
,
other
):
if
is_compatible_with
(
other
,
float
):
return
slope_intercept_layer
(
input
=
layeroutput
,
intercept
=
other
)
return
slope_intercept_layer
(
input
=
layeroutput
,
intercept
=
-
other
)
if
not
isinstance
(
other
,
LayerOutput
):
logger
.
fatal
(
"LayerOutput can only be subtracted with"
" another Layeroutput or a number"
)
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
浏览文件 @
aaf818f5
...
...
@@ -230,7 +230,7 @@ layers {
input_layer_name: "__mixed_1__"
}
slope: 1.0
intercept: 2
intercept:
-
2
}
layers {
name: "__slope_intercept_layer_4__"
...
...
@@ -411,4 +411,3 @@ sub_models {
output_layer_names: "__mixed_3__"
is_recurrent_layer_group: false
}
python/paddle/v2/dataset/common.py
浏览文件 @
aaf818f5
...
...
@@ -74,6 +74,8 @@ def download(url, module_name, md5sum, save_name=None):
retry
=
0
retry_limit
=
3
while
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
if
os
.
path
.
exists
(
filename
):
print
"file md5"
,
md5file
(
filename
),
md5sum
if
retry
<
retry_limit
:
retry
+=
1
else
:
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
aaf818f5
此差异已折叠。
点击以展开。
python/paddle/v2/fluid/framework.py
浏览文件 @
aaf818f5
...
...
@@ -286,6 +286,10 @@ class Variable(object):
def
name
(
self
):
return
self
.
desc
.
name
()
@
name
.
setter
def
name
(
self
,
new_name
):
self
.
desc
.
set_name
(
new_name
)
@
property
def
shape
(
self
):
# convert to tuple, make it as same as numpy API.
...
...
@@ -531,6 +535,12 @@ class Operator(object):
"""
return
self
.
desc
.
input
(
name
)
def
rename_input
(
self
,
old_name
,
new_name
):
self
.
desc
.
rename_input
(
old_name
,
new_name
)
def
rename_output
(
self
,
old_name
,
new_name
):
self
.
desc
.
rename_output
(
old_name
,
new_name
)
@
property
def
input_names
(
self
):
"""
...
...
@@ -540,6 +550,14 @@ class Operator(object):
"""
return
self
.
desc
.
input_names
()
@
property
def
input_arg_names
(
self
):
return
self
.
desc
.
input_arg_names
()
@
property
def
output_arg_names
(
self
):
return
self
.
desc
.
output_arg_names
()
def
output
(
self
,
name
):
"""
Get output arguments by the output parameter name
...
...
@@ -741,6 +759,60 @@ class Block(object):
def
has_var
(
self
,
name
):
return
name
in
self
.
vars
def
rename_var
(
self
,
name
,
new_name
):
"""
Rename variable in vars and ops' inputs and outputs
"""
if
not
self
.
has_var
(
name
):
raise
ValueError
(
"var %s is not in current"
%
name
)
v
=
self
.
var
(
name
)
stop_gradient
=
None
trainable
=
None
optimize_attr
=
None
regularizer
=
None
gradient_clip_attr
=
None
error_clip
=
None
if
type
(
v
)
==
Parameter
:
stop_gradient
=
v
.
stop_gradient
trainable
=
v
.
trainable
optimize_attr
=
v
.
optimize_attr
regularizer
=
v
.
regularizer
gradient_clip_attr
=
v
.
gradient_clip_attr
error_clip
=
v
.
error_clip
elif
type
(
v
)
==
Variable
:
error_clip
=
v
.
error_clip
stop_gradient
=
v
.
stop_gradient
else
:
raise
ValueError
(
"unsupported var type: %s"
,
type
(
v
))
self
.
desc
.
rename_var
(
name
,
new_name
)
d
=
self
.
desc
.
find_var
(
new_name
)
var
=
None
if
type
(
v
)
==
Parameter
:
var
=
Parameter
(
self
,
d
.
shape
(),
d
.
dtype
(),
name
=
new_name
,
stop_gradient
=
stop_gradient
,
trainable
=
trainable
,
optimize_attr
=
optimize_attr
,
regularizer
=
regularizer
,
gradient_clip_attr
=
gradient_clip_attr
,
error_clip
=
error_clip
)
elif
type
(
v
)
==
Variable
:
var
=
Variable
(
self
,
name
=
new_name
,
error_clip
=
error_clip
,
stop_gradient
=
stop_gradient
)
# rename the python side, sync_with_cpp will only add
# new vars/ops to python side.
self
.
vars
[
new_name
]
=
var
del
self
.
vars
[
name
]
self
.
sync_with_cpp
()
def
create_parameter
(
self
,
*
args
,
**
kwargs
):
global_block
=
self
.
program
.
global_block
()
param
=
Parameter
(
global_block
,
*
args
,
**
kwargs
)
...
...
python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
浏览文件 @
aaf818f5
...
...
@@ -58,14 +58,19 @@ trainers = int(os.getenv("TRAINERS")) # total trainer count
current_endpoint
=
os
.
getenv
(
"SERVER_ENDPOINT"
)
# current pserver endpoint
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
# get the training role: trainer/pserver
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
optimize_ops
,
params_grads
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
optimize_ops
,
params_grads
,
0
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
not
current_endpoint
:
print
(
"need env SERVER_ENDPOINT"
)
exit
(
1
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
pserver_startup
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
exe
.
run
(
pserver_startup
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录