Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
d7e5e1f1
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d7e5e1f1
编写于
3月 15, 2018
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into average_model
上级
8a645685
e26f1123
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
522 addition
and
171 deletion
+522
-171
CMakeLists.txt
CMakeLists.txt
+1
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+23
-21
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+109
-0
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+146
-7
paddle/fluid/operators/assign_op.cc
paddle/fluid/operators/assign_op.cc
+1
-0
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+7
-7
paddle/fluid/operators/mul_op.cu.cc
paddle/fluid/operators/mul_op.cu.cc
+6
-4
paddle/fluid/operators/mul_op.h
paddle/fluid/operators/mul_op.h
+1
-1
paddle/fluid/operators/nccl_op.cu.cc
paddle/fluid/operators/nccl_op.cu.cc
+2
-0
paddle/fluid/operators/scatter_op.cc
paddle/fluid/operators/scatter_op.cc
+17
-18
paddle/fluid/operators/scatter_op.cu
paddle/fluid/operators/scatter_op.cu
+10
-10
paddle/fluid/operators/scatter_op.h
paddle/fluid/operators/scatter_op.h
+11
-11
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+9
-1
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+56
-6
python/CMakeLists.txt
python/CMakeLists.txt
+34
-27
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+6
-3
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+7
-25
python/paddle/fluid/tests/unittests/test_mul_op.py
python/paddle/fluid/tests/unittests/test_mul_op.py
+38
-0
python/paddle/fluid/tests/unittests/test_scatter_op.py
python/paddle/fluid/tests/unittests/test_scatter_op.py
+1
-1
python/setup.py.in
python/setup.py.in
+37
-28
未找到文件。
CMakeLists.txt
浏览文件 @
d7e5e1f1
...
...
@@ -53,7 +53,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option
(
ON_TRAVIS
"Exclude special unit test on Travis CI"
OFF
)
option
(
WITH_C_API
"Compile PaddlePaddle with C-API(Prediction)"
OFF
)
# TODO: Only compile PaddlePaddle fluid version by WITH_FLUID option.
option
(
WITH_FLUID
"Compile PaddlePaddle fluid only(TODO)"
O
N
)
option
(
WITH_FLUID
"Compile PaddlePaddle fluid only(TODO)"
O
FF
)
option
(
WITH_GOLANG
"Compile PaddlePaddle with GOLANG"
OFF
)
option
(
GLIDE_INSTALL
"Download and install go dependencies "
ON
)
option
(
USE_NNPACK
"Compile PaddlePaddle with NNPACK library"
OFF
)
...
...
paddle/CMakeLists.txt
浏览文件 @
d7e5e1f1
add_subdirectory
(
cuda
)
add_subdirectory
(
function
)
add_subdirectory
(
utils
)
add_subdirectory
(
math
)
add_subdirectory
(
gserver
)
add_subdirectory
(
parameter
)
add_subdirectory
(
testing
)
if
(
MOBILE_INFERENCE
)
add_subdirectory
(
capi
)
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
scripts
)
if
(
NOT WITH_FLUID
)
add_subdirectory
(
cuda
)
add_subdirectory
(
function
)
add_subdirectory
(
utils
)
add_subdirectory
(
math
)
add_subdirectory
(
gserver
)
add_subdirectory
(
parameter
)
if
(
WITH_C_API
)
if
(
MOBILE_INFERENCE
)
add_subdirectory
(
capi
)
endif
()
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
scripts
)
if
(
NOT ANDROID AND NOT IOS
)
add_subdirectory
(
fluid
)
endif
()
if
(
WITH_C_API
)
add_subdirectory
(
capi
)
endif
()
if
(
WITH_SWIG_PY
)
add_subdirectory
(
api
)
if
(
WITH_SWIG_PY
)
add_subdirectory
(
api
)
endif
()
endif
()
endif
()
add_subdirectory
(
testing
)
if
(
NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS
)
add_subdirectory
(
fluid
)
endif
()
paddle/fluid/framework/channel.h
浏览文件 @
d7e5e1f1
...
...
@@ -15,23 +15,43 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable>
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
enum
class
ChannelAction
{
SEND
=
0
,
RECEIVE
=
1
,
CLOSE
=
2
,
};
// Channel is the abstract class of buffered and un-buffered channels.
template
<
typename
T
>
class
Channel
{
public:
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
bool
Send
(
T
*
)
=
0
;
virtual
bool
Receive
(
T
*
)
=
0
;
virtual
size_t
Cap
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
~
Channel
()
{}
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
)
=
0
;
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
=
0
;
};
// Forward declaration of channel implementations.
...
...
@@ -80,6 +100,27 @@ class ChannelHolder {
return
channel
!=
nullptr
?
channel
->
Receive
(
data
)
:
false
;
}
bool
IsClosed
()
{
if
(
IsInitialized
())
{
return
holder_
->
IsClosed
();
}
return
false
;
}
bool
CanSend
()
{
if
(
IsInitialized
())
{
return
holder_
->
CanSend
();
}
return
false
;
}
bool
CanReceive
()
{
if
(
IsInitialized
())
{
return
holder_
->
CanReceive
();
}
return
false
;
}
void
close
()
{
if
(
IsInitialized
())
holder_
->
Close
();
}
...
...
@@ -97,6 +138,50 @@ class ChannelHolder {
if
(
IsInitialized
())
holder_
->
Unlock
();
}
template
<
typename
T
>
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToSendQ
(
referrer
,
data
,
cond
,
cb
);
}
}
}
template
<
typename
T
>
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToReceiveQ
(
referrer
,
data
,
cond
,
cb
);
}
}
}
template
<
typename
T
>
void
RemoveFromSendQ
(
const
void
*
referrer
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
RemoveFromSendQ
(
referrer
);
}
}
}
template
<
typename
T
>
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
RemoveFromReceiveQ
(
referrer
);
}
}
}
inline
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
inline
const
std
::
type_index
Type
()
{
...
...
@@ -113,6 +198,9 @@ class ChannelHolder {
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_index
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
...
...
@@ -129,6 +217,27 @@ class ChannelHolder {
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
channel_
.
get
());
}
virtual
bool
IsClosed
()
{
if
(
channel_
)
{
return
channel_
->
IsClosed
();
}
return
false
;
}
virtual
bool
CanSend
()
{
if
(
channel_
)
{
return
channel_
->
CanSend
();
}
return
false
;
}
virtual
bool
CanReceive
()
{
if
(
channel_
)
{
return
channel_
->
CanReceive
();
}
return
false
;
}
virtual
void
Close
()
{
if
(
channel_
)
channel_
->
Close
();
}
...
...
paddle/fluid/framework/channel_impl.h
浏览文件 @
d7e5e1f1
...
...
@@ -29,32 +29,50 @@ class ChannelImpl : public paddle::framework::Channel<T> {
friend
void
paddle
::
framework
::
CloseChannel
<
T
>
(
Channel
<
T
>
*
);
public:
virtual
bool
CanSend
();
virtual
bool
CanReceive
();
virtual
bool
Send
(
T
*
);
virtual
bool
Receive
(
T
*
);
virtual
size_t
Cap
()
{
return
cap_
;
}
virtual
void
Lock
();
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
void
Close
();
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
);
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
);
private:
struct
QueueMessage
{
T
*
data
;
std
::
condition_variable_any
cond
;
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
;
bool
chan_closed
=
false
;
bool
completed
=
false
;
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
QueueMessage
(
T
*
item
)
:
data
(
item
)
{}
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
:
data
(
item
),
cond
(
cond
)
{}
void
Wait
(
std
::
unique_lock
<
std
::
recursive_mutex
>
&
lock
)
{
cond
.
wait
(
lock
,
[
this
]()
{
return
completed
;
});
cond
->
wait
(
lock
,
[
this
]()
{
return
completed
;
});
}
void
Notify
()
{
completed
=
true
;
cond
.
notify_all
();
cond
->
notify_all
();
}
};
...
...
@@ -87,6 +105,18 @@ ChannelImpl<T>::ChannelImpl(size_t capacity)
PADDLE_ENFORCE_GE
(
capacity
,
0
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanSend
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
closed_
&&
(
!
recvq
.
empty
()
||
buf_
.
size
()
<
cap_
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanReceive
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
(
closed_
&&
buf_
.
empty
())
&&
(
!
sendq
.
empty
()
||
buf_
.
size
()
>
0
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
Send
(
T
*
item
)
{
send_ctr
++
;
...
...
@@ -105,7 +135,24 @@ bool ChannelImpl<T>::Send(T *item) {
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
recvq
.
pop_front
();
// Do the data transfer
*
(
m
->
data
)
=
std
::
move
(
*
item
);
// We will do this data transfer if either of the following
// cases are true
// 1. callback == nullptr // This means it was a regular channel send
// 2. callback returns true
bool
do_send
=
true
;
if
(
m
->
callback
!=
nullptr
)
do_send
=
m
->
callback
(
ChannelAction
::
SEND
);
if
(
do_send
)
*
(
m
->
data
)
=
std
::
move
(
*
item
);
else
// We cannot do the data transfer because
// this QueueMessage was added by Select
// and some other case was executed.
// So call the Send function again.
// We do not care about notifying other
// because they would have been notified
// by the executed select case.
return
Send
(
item
);
// Wake up the blocked process and unlock
m
->
Notify
();
lock
.
unlock
();
...
...
@@ -150,7 +197,25 @@ bool ChannelImpl<T>::Receive(T *item) {
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
sendq
.
pop_front
();
// Do the data transfer
*
item
=
std
::
move
(
*
(
m
->
data
));
// We will do this data transfer if either of the following
// cases are true
// 1. callback == nullptr // This means it was a regular channel send
// 2. callback returns true
bool
do_receive
=
true
;
if
(
m
->
callback
!=
nullptr
)
do_receive
=
m
->
callback
(
ChannelAction
::
RECEIVE
);
if
(
do_receive
)
*
item
=
std
::
move
(
*
(
m
->
data
));
else
// We cannot do the data transfer because
// this QueueMessage was added by Select
// and some other case was executed.
// So call the Receive function again.
// We do not care about notifying other
// because they would have been notified
// by the executed select case.
return
Receive
(
item
);
// Wake up the blocked process and unlock
m
->
Notify
();
lock
.
unlock
();
...
...
@@ -186,6 +251,12 @@ void ChannelImpl<T>::Unlock() {
mu_
.
unlock
();
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
IsClosed
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
closed_
;
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Close
()
{
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
...
...
@@ -203,6 +274,12 @@ void ChannelImpl<T>::Close() {
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
recvq
.
pop_front
();
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
}
...
...
@@ -211,10 +288,72 @@ void ChannelImpl<T>::Close() {
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
sendq
.
pop_front
();
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
sendq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
recvq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromSendQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
sendq
.
begin
();
it
!=
sendq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
sendMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
sendMsg
->
referrer
==
referrer
)
{
it
=
sendq
.
erase
(
it
);
send_ctr
--
;
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
recvq
.
begin
();
it
!=
recvq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
recvMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
recvMsg
->
referrer
==
referrer
)
{
it
=
recvq
.
erase
(
it
);
recv_ctr
--
;
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
ChannelImpl
<
T
>::~
ChannelImpl
()
{
Close
();
...
...
paddle/fluid/operators/assign_op.cc
浏览文件 @
d7e5e1f1
...
...
@@ -56,6 +56,7 @@ class AssignFunctor {
private:
void
copy_tensor
(
const
framework
::
LoDTensor
&
lod_tensor
,
framework
::
LoDTensor
*
out
)
const
{
if
(
lod_tensor
.
numel
()
==
0
)
return
;
auto
&
out_tensor
=
*
out
;
TensorCopy
(
lod_tensor
,
lod_tensor
.
place
(),
dev_ctx_
,
&
out_tensor
);
out_tensor
.
set_lod
(
lod_tensor
.
lod
());
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
d7e5e1f1
...
...
@@ -17,11 +17,14 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
framework
::
OpKernelType
;
using
framework
::
Tensor
;
class
MulOp
ShapeInference
:
public
framework
::
InferShapeBase
{
class
MulOp
:
public
framework
::
OperatorWithKernel
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
...
...
@@ -122,7 +125,7 @@ or not. But the output only shares the LoD information with input $X$.
}
};
class
Mul
OpGrad
:
public
framework
::
OperatorWithKernel
{
class
Mul
GradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -156,10 +159,7 @@ class MulOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
mul
,
paddle
::
framework
::
OperatorWithKernel
,
ops
::
MulOpMaker
,
ops
::
MulOpShapeInference
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
mul_grad
,
ops
::
MulOpGrad
);
REGISTER_OP
(
mul
,
ops
::
MulOp
,
ops
::
MulOpMaker
,
mul_grad
,
ops
::
MulGradOp
);
REGISTER_OP_CPU_KERNEL
(
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/mul_op.cu.cc
浏览文件 @
d7e5e1f1
...
...
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
mul_grad
,
ops
::
MulGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
mul
,
ops
::
MulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
MulKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
mul_grad
,
ops
::
MulGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/mul_op.h
浏览文件 @
d7e5e1f1
...
...
@@ -48,7 +48,7 @@ class MulKernel : public framework::OpKernel<T> {
}
math
::
matmul
<
DeviceContext
,
T
>
(
context
.
template
device_context
<
DeviceContext
>(),
x_matrix
,
false
,
y_matrix
,
false
,
1
,
z
,
0
);
y_matrix
,
false
,
static_cast
<
T
>
(
1
),
z
,
static_cast
<
T
>
(
0
)
);
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
(
z_dim
);
}
...
...
paddle/fluid/operators/nccl_op.cu.cc
浏览文件 @
d7e5e1f1
...
...
@@ -106,6 +106,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
T
*
recvbuffer
=
nullptr
;
if
(
root
==
gpu_id
)
{
recvbuffer
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
else
{
out
->
Resize
(
framework
::
make_ddim
({
0
}));
}
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke reduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
...
...
paddle/fluid/operators/scatter_op.cc
浏览文件 @
d7e5e1f1
...
...
@@ -23,24 +23,24 @@ class ScatterOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Ref
"
),
"Input(
Ref
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"I
ndex
"
),
"Input(I
ndex
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
X
"
),
"Input(
X
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"I
ds
"
),
"Input(I
ds
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Updates"
),
"Input(Updates) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ScatterOp should not be null."
);
auto
updates_dims
=
ctx
->
GetInputDim
(
"Updates"
);
auto
ref_dims
=
ctx
->
GetInputDim
(
"
Ref
"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"I
ndex
"
).
size
(),
1
,
"Update I
ndex
should be 1-D."
);
auto
ref_dims
=
ctx
->
GetInputDim
(
"
X
"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"I
ds
"
).
size
(),
1
,
"Update I
ds
should be 1-D."
);
PADDLE_ENFORCE_EQ
(
ref_dims
.
size
(),
updates_dims
.
size
(),
"
Ref
erence and Updates should have the same shape size"
);
"
X
erence and Updates should have the same shape size"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Updates"
)[
0
],
ctx
->
GetInputDim
(
"I
ndex
"
)[
0
],
"Updates and I
ndex
should have same batch-size."
);
ctx
->
GetInputDim
(
"I
ds
"
)[
0
],
"Updates and I
ds
should have same batch-size."
);
framework
::
DDim
data_dim
(
updates_dims
);
for
(
int
i
=
1
;
i
<
data_dim
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
data_dim
[
i
],
updates_dims
[
i
]);
...
...
@@ -52,7 +52,7 @@ class ScatterOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Ref
"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
X
"
)
->
type
()),
ctx
.
device_context
());
}
};
...
...
@@ -64,14 +64,14 @@ class ScatterGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Updates"
),
ctx
->
GetInputDim
(
"Updates"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"
Ref"
),
ctx
->
GetInputDim
(
"Ref
"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"
X"
),
ctx
->
GetInputDim
(
"X
"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Ref
"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
X
"
)
->
type
()),
ctx
.
device_context
());
}
};
...
...
@@ -80,9 +80,8 @@ class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ScatterOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Ref"
,
"The source input of scatter op"
);
AddInput
(
"Index"
,
"The index input of scatter op where Ref will be updated"
);
AddInput
(
"X"
,
"The source input of scatter op"
);
AddInput
(
"Ids"
,
"The index input of scatter op where X will be updated"
);
AddInput
(
"Updates"
,
"The updated value of updates op"
);
AddOutput
(
"Out"
,
"The output of add op"
);
AddComment
(
R"DOC(
...
...
@@ -91,8 +90,8 @@ Scatter Operator.
This operator obtains output by updating the input on selected indices on the first axis:
$$
Out =
Ref
\\
Out[I
ndex] = Ref[Index
] + Updates
Out =
X
\\
Out[I
ds] = X[Ids
] + Updates
$$
)DOC"
);
...
...
paddle/fluid/operators/scatter_op.cu
浏览文件 @
d7e5e1f1
...
...
@@ -25,14 +25,14 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
auto
*
Ref
=
ctx
.
Input
<
Tensor
>
(
"Ref
"
);
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
Out
->
ShareDataWith
(
*
Ref
);
Out
->
ShareDataWith
(
*
X
);
GPUScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ndex
,
Out
);
GPUScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ds
,
Out
);
}
};
...
...
@@ -42,16 +42,16 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
auto
*
d
Ref
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Ref
"
));
auto
*
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X
"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
// In place gradient: d
Ref
= dO
d
Ref
->
ShareDataWith
(
*
dOut
);
// In place gradient: d
X
= dO
d
X
->
ShareDataWith
(
*
dOut
);
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// Gradient by Gather: dUpdates = dO[I
ndex
]
GPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ndex
,
dUpdates
);
// Gradient by Gather: dUpdates = dO[I
ds
]
GPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ds
,
dUpdates
);
}
};
...
...
paddle/fluid/operators/scatter_op.h
浏览文件 @
d7e5e1f1
...
...
@@ -29,15 +29,15 @@ class ScatterOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
auto
*
Ref
=
ctx
.
Input
<
Tensor
>
(
"Ref
"
);
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
// In place output: Out =
Ref, Out[Index
] += Updates
Out
->
ShareDataWith
(
*
Ref
);
// In place output: Out =
X, Out[Ids
] += Updates
Out
->
ShareDataWith
(
*
X
);
// Apply ScatterUpdate: Out[index] += Updates[:]
ScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ndex
,
Out
);
ScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ds
,
Out
);
}
};
...
...
@@ -47,16 +47,16 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
auto
*
d
Ref
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Ref
"
));
auto
*
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X
"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
// In place gradient: d
Ref
= dO
d
Ref
->
ShareDataWith
(
*
dOut
);
// In place gradient: d
X
= dO
d
X
->
ShareDataWith
(
*
dOut
);
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// Gradient by Gather: dUpdates += dO[I
ndex
]
CPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ndex
,
dUpdates
);
// Gradient by Gather: dUpdates += dO[I
ds
]
CPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ds
,
dUpdates
);
}
};
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d7e5e1f1
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h"
...
...
@@ -103,12 +104,14 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
uint16_t
>
)
#ifdef PADDLE_WITH_CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint16_t
>
)
#endif
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
...
...
@@ -315,7 +318,6 @@ All parameter, weight, gradient are variables in Paddle.
#endif
});
// clang-format on
#ifdef PADDLE_WITH_CUDA
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
#endif
...
...
@@ -423,6 +425,12 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"init_devices"
,
&
framework
::
InitDevices
);
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
CUDAPlace
&
place
)
->
bool
{
// Only GPUs with Compute Capability >= 53 support float16
return
platform
::
GetCUDAComputeCapability
(
place
.
device
)
>=
53
;
});
#endif
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
d7e5e1f1
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
...
...
@@ -77,21 +78,32 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
place
()))
{
dst_tensor
=
tensor
;
}
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
"e"
,
/* np.dtype('e') == np.float16 */
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
else
{
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
}
}
};
}
// namespace details
inline
py
::
buffer_info
CastToPyBuffer
(
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
>
()(
tensor
);
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
platform
::
float16
>
()(
tensor
);
return
buffer_info
;
}
...
...
@@ -136,6 +148,22 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
template
<
>
void
PyCPUTensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CPUPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
((
int
)
array
.
shape
()[
i
]);
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
());
}
#ifdef PADDLE_WITH_CUDA
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
...
...
@@ -157,6 +185,28 @@ void PyCUDATensorSetFromArray(
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
}
template
<
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CUDAPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
((
int
)
array
.
shape
()[
i
]);
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
}
#endif
}
// namespace pybind
...
...
python/CMakeLists.txt
浏览文件 @
d7e5e1f1
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py
)
file
(
GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/ *.py
)
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
${
HELPERS_PY_FILES
}
${
UTILS_PY_FILES
}
${
V2_PY_FILES
}
${
FLUID_PY_FILES
}
)
add_custom_target
(
copy_paddle_master
)
if
(
NOT WITH_FLUID
)
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py
)
set
(
PY_FILES
${
PY_FILES
}
${
TRAINER_PY_FILES
}
${
HELPERS_PY_FILES
}
${
V2_PY_FILES
}
)
SET
(
COPY_PADDLE_MASTER
""
)
if
(
WITH_GOLANG
)
SET
(
COPY_PADDLE_MASTER
"copy_paddle_master"
)
add_custom_command
(
TARGET
${
COPY_PADDLE_MASTER
}
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/v2/master/
)
add_dependencies
(
copy_paddle_master paddle_master
)
endif
(
WITH_GOLANG
)
add_custom_target
(
copy_paddle_master
)
SET
(
COPY_PADDLE_MASTER
""
)
if
(
WITH_GOLANG
)
SET
(
COPY_PADDLE_MASTER
"copy_paddle_master"
)
add_custom_command
(
TARGET
${
COPY_PADDLE_MASTER
}
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/v2/master/
)
add_dependencies
(
copy_paddle_master paddle_master
)
endif
(
WITH_GOLANG
)
endif
()
set
(
MKL_SHARED_LIBS
""
)
set
(
MKL_DEPENDS
""
)
...
...
@@ -59,23 +61,28 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib*
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model
${
MKL_DEPENDS
}
)
if
(
WITH_SWIG_PY
)
list
(
APPEND paddle_python_deps python_api_wheel
)
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
${
MKL_DEPENDS
}
)
if
(
NOT WITH_FLUID
)
set
(
paddle_python_deps
${
paddle_python_deps
}
paddle_pserver_main paddle_trainer paddle_merge_model
)
if
(
WITH_SWIG_PY
)
list
(
APPEND paddle_python_deps python_api_wheel
)
endif
()
endif
()
add_custom_target
(
paddle_python ALL DEPENDS
${
paddle_python_deps
}
)
set
(
PADDLE_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
if
(
WITH_TESTING
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
if
(
WITH_SWIG_PY
)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory
(
paddle/v2/tests
)
add_subdirectory
(
paddle/v2/reader/tests
)
add_subdirectory
(
paddle/v2/plot/tests
)
add_subdirectory
(
paddle/fluid/tests
)
if
(
NOT WITH_FLUID
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
if
(
WITH_SWIG_PY
)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory
(
paddle/v2/tests
)
add_subdirectory
(
paddle/v2/reader/tests
)
add_subdirectory
(
paddle/v2/plot/tests
)
endif
()
endif
()
add_subdirectory
(
paddle/fluid/tests
)
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/backward.py
浏览文件 @
d7e5e1f1
...
...
@@ -248,12 +248,15 @@ def _callback_lookup_(op):
if
o_argu
in
self
.
param_grad_names
:
allreduce_out_name
=
o_argu
+
"__nccl_all_reduce__"
op_desc
=
_create_op_desc_
(
"ncclAllReduce"
,
{
"ncclReduce"
,
{
"X"
:
[
o_argu
],
"Communicator"
:
[
'nccl_com__do_not_change_'
]
},
{
"Out"
:
[
allreduce_out_name
]},
{
"reduction"
:
"ncclSum"
})
},
{
"Out"
:
[
allreduce_out_name
]},
{
"reduction"
:
"ncclSum"
,
"root"
:
0
},
)
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
op_desc
=
_create_op_desc_
(
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
d7e5e1f1
...
...
@@ -45,31 +45,13 @@ __activations__ = [
]
__all__
=
[
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'clip'
,
'clip_by_norm'
,
'softmax'
,
'sequence_softmax'
,
'logical_and'
,
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'uniform_random'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'gaussian_random_batch_size_like'
,
'cumsum'
,
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'clip'
,
'clip_by_norm'
,
'softmax'
,
'sequence_softmax'
,
'logical_and'
,
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'uniform_random'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'gaussian_random_batch_size_like'
,
'cumsum'
,
'scatter'
]
+
__activations__
for
_OP
in
set
(
__all__
):
...
...
python/paddle/fluid/tests/unittests/test_mul_op.py
浏览文件 @
d7e5e1f1
...
...
@@ -14,6 +14,7 @@
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
...
...
@@ -69,5 +70,42 @@ class TestMulOp2(OpTest):
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
class
TestFP16MulOp1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
x
=
np
.
random
.
random
((
32
,
84
)).
astype
(
"float16"
)
y
=
np
.
random
.
random
((
84
,
100
)).
astype
(
"float16"
)
self
.
inputs
=
{
'X'
:
x
.
view
(
np
.
uint16
),
'Y'
:
y
.
view
(
np
.
uint16
)}
self
.
outputs
=
{
'Out'
:
np
.
dot
(
x
,
y
)}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
1e-1
)
class
TestFP16MulOp2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
x
=
np
.
random
.
random
((
15
,
4
,
12
,
10
)).
astype
(
"float16"
)
y
=
np
.
random
.
random
((
4
,
30
,
8
,
2
,
9
)).
astype
(
"float16"
)
self
.
inputs
=
{
'X'
:
x
.
view
(
np
.
uint16
),
'Y'
:
y
.
view
(
np
.
uint16
)}
self
.
attrs
=
{
'x_num_col_dims'
:
2
,
'y_num_col_dims'
:
2
,
}
result
=
np
.
dot
(
x
.
reshape
(
15
*
4
,
12
*
10
),
y
.
reshape
(
4
*
30
,
8
*
2
*
9
))
result
=
result
.
reshape
(
15
,
4
,
8
,
2
,
9
)
self
.
outputs
=
{
'Out'
:
result
}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
2e-1
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_scatter_op.py
浏览文件 @
d7e5e1f1
...
...
@@ -25,7 +25,7 @@ class TestScatterOp(OpTest):
updates_np
=
np
.
random
.
random
((
2
,
3
)).
astype
(
"float32"
)
output_np
=
np
.
copy
(
ref_np
)
output_np
[
index_np
]
=
updates_np
self
.
inputs
=
{
'
Ref'
:
ref_np
,
'Index
'
:
index_np
,
'Updates'
:
updates_np
}
self
.
inputs
=
{
'
X'
:
ref_np
,
'Ids
'
:
index_np
,
'Updates'
:
updates_np
}
self
.
outputs
=
{
'Out'
:
output_np
}
def
test_check_output
(
self
):
...
...
python/setup.py.in
浏览文件 @
d7e5e1f1
...
...
@@ -62,20 +62,22 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
packages=['paddle',
'paddle.proto',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'paddle.fluid',
'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
'paddle.fluid.layers',
'py_paddle']
'paddle.fluid.layers']
if '${WITH_FLUID}'== 'OFF':
packages+=['paddle.proto',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
setup_requires = f.read().splitlines()
...
...
@@ -84,11 +86,29 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=['opencv-python']
# the prefix is sys.prefix which should always be usr
paddle_bin_dir = 'opt/paddle/bin'
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
'${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
'${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main',
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
paddle_bins = ''
if '${WITH_FLUID}'== 'OFF':
paddle_bin_dir = 'opt/paddle/bin'
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
'${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
'${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main',
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
package_data={'paddle.fluid': ['core.so']}
if '${WITH_FLUID}'== 'OFF':
package_data['paddle.v2.master']=['libpaddle_master.so']
package_data['py_paddle']=['*.py','_swig_paddle.so']
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
}
if '${WITH_FLUID}'== 'OFF':
package_dir['py_paddle']='${PADDLE_SOURCE_DIR}/paddle/py_paddle'
paddle_rt_lib_dir = 'lib'
paddle_rt_libs = ['${WARPCTC_LIBRARIES}']
...
...
@@ -101,19 +121,8 @@ setup(name='${PACKAGE_NAME}',
install_requires=setup_requires,
packages=packages,
ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={
'paddle.v2.master': ['libpaddle_master.so'],
'paddle.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so']
},
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
},
package_data=package_data,
package_dir=package_dir,
scripts=paddle_bins,
data_files=[(paddle_rt_lib_dir, paddle_rt_libs)]
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录