Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d7e5e1f1
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d7e5e1f1
编写于
3月 15, 2018
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into average_model
上级
8a645685
e26f1123
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
522 addition
and
171 deletion
+522
-171
CMakeLists.txt
CMakeLists.txt
+1
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+23
-21
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+109
-0
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+146
-7
paddle/fluid/operators/assign_op.cc
paddle/fluid/operators/assign_op.cc
+1
-0
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+7
-7
paddle/fluid/operators/mul_op.cu.cc
paddle/fluid/operators/mul_op.cu.cc
+6
-4
paddle/fluid/operators/mul_op.h
paddle/fluid/operators/mul_op.h
+1
-1
paddle/fluid/operators/nccl_op.cu.cc
paddle/fluid/operators/nccl_op.cu.cc
+2
-0
paddle/fluid/operators/scatter_op.cc
paddle/fluid/operators/scatter_op.cc
+17
-18
paddle/fluid/operators/scatter_op.cu
paddle/fluid/operators/scatter_op.cu
+10
-10
paddle/fluid/operators/scatter_op.h
paddle/fluid/operators/scatter_op.h
+11
-11
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+9
-1
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+56
-6
python/CMakeLists.txt
python/CMakeLists.txt
+34
-27
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+6
-3
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+7
-25
python/paddle/fluid/tests/unittests/test_mul_op.py
python/paddle/fluid/tests/unittests/test_mul_op.py
+38
-0
python/paddle/fluid/tests/unittests/test_scatter_op.py
python/paddle/fluid/tests/unittests/test_scatter_op.py
+1
-1
python/setup.py.in
python/setup.py.in
+37
-28
未找到文件。
CMakeLists.txt
浏览文件 @
d7e5e1f1
...
@@ -53,7 +53,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
...
@@ -53,7 +53,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option
(
ON_TRAVIS
"Exclude special unit test on Travis CI"
OFF
)
option
(
ON_TRAVIS
"Exclude special unit test on Travis CI"
OFF
)
option
(
WITH_C_API
"Compile PaddlePaddle with C-API(Prediction)"
OFF
)
option
(
WITH_C_API
"Compile PaddlePaddle with C-API(Prediction)"
OFF
)
# TODO: Only compile PaddlePaddle fluid version by WITH_FLUID option.
# TODO: Only compile PaddlePaddle fluid version by WITH_FLUID option.
option
(
WITH_FLUID
"Compile PaddlePaddle fluid only(TODO)"
O
N
)
option
(
WITH_FLUID
"Compile PaddlePaddle fluid only(TODO)"
O
FF
)
option
(
WITH_GOLANG
"Compile PaddlePaddle with GOLANG"
OFF
)
option
(
WITH_GOLANG
"Compile PaddlePaddle with GOLANG"
OFF
)
option
(
GLIDE_INSTALL
"Download and install go dependencies "
ON
)
option
(
GLIDE_INSTALL
"Download and install go dependencies "
ON
)
option
(
USE_NNPACK
"Compile PaddlePaddle with NNPACK library"
OFF
)
option
(
USE_NNPACK
"Compile PaddlePaddle with NNPACK library"
OFF
)
...
...
paddle/CMakeLists.txt
浏览文件 @
d7e5e1f1
add_subdirectory
(
cuda
)
if
(
NOT WITH_FLUID
)
add_subdirectory
(
function
)
add_subdirectory
(
cuda
)
add_subdirectory
(
utils
)
add_subdirectory
(
function
)
add_subdirectory
(
math
)
add_subdirectory
(
utils
)
add_subdirectory
(
gserver
)
add_subdirectory
(
math
)
add_subdirectory
(
parameter
)
add_subdirectory
(
gserver
)
add_subdirectory
(
testing
)
add_subdirectory
(
parameter
)
if
(
MOBILE_INFERENCE
)
add_subdirectory
(
capi
)
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
scripts
)
if
(
WITH_C_API
)
if
(
MOBILE_INFERENCE
)
add_subdirectory
(
capi
)
add_subdirectory
(
capi
)
endif
()
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
scripts
)
if
(
NOT ANDROID AND NOT IOS
)
if
(
WITH_C_API
)
add_subdirectory
(
fluid
)
add_subdirectory
(
capi
)
endif
()
endif
()
if
(
WITH_SWIG_PY
)
if
(
WITH_SWIG_PY
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
endif
()
endif
()
endif
()
endif
()
endif
()
add_subdirectory
(
testing
)
if
(
NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS
)
add_subdirectory
(
fluid
)
endif
()
paddle/fluid/framework/channel.h
浏览文件 @
d7e5e1f1
...
@@ -15,23 +15,43 @@ limitations under the License. */
...
@@ -15,23 +15,43 @@ limitations under the License. */
#pragma once
#pragma once
#include <stddef.h> // for size_t
#include <stddef.h> // for size_t
#include <condition_variable>
#include <typeindex>
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
enum
class
ChannelAction
{
SEND
=
0
,
RECEIVE
=
1
,
CLOSE
=
2
,
};
// Channel is the abstract class of buffered and un-buffered channels.
// Channel is the abstract class of buffered and un-buffered channels.
template
<
typename
T
>
template
<
typename
T
>
class
Channel
{
class
Channel
{
public:
public:
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
bool
Send
(
T
*
)
=
0
;
virtual
bool
Send
(
T
*
)
=
0
;
virtual
bool
Receive
(
T
*
)
=
0
;
virtual
bool
Receive
(
T
*
)
=
0
;
virtual
size_t
Cap
()
=
0
;
virtual
size_t
Cap
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
~
Channel
()
{}
virtual
~
Channel
()
{}
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
)
=
0
;
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
=
0
;
};
};
// Forward declaration of channel implementations.
// Forward declaration of channel implementations.
...
@@ -80,6 +100,27 @@ class ChannelHolder {
...
@@ -80,6 +100,27 @@ class ChannelHolder {
return
channel
!=
nullptr
?
channel
->
Receive
(
data
)
:
false
;
return
channel
!=
nullptr
?
channel
->
Receive
(
data
)
:
false
;
}
}
bool
IsClosed
()
{
if
(
IsInitialized
())
{
return
holder_
->
IsClosed
();
}
return
false
;
}
bool
CanSend
()
{
if
(
IsInitialized
())
{
return
holder_
->
CanSend
();
}
return
false
;
}
bool
CanReceive
()
{
if
(
IsInitialized
())
{
return
holder_
->
CanReceive
();
}
return
false
;
}
void
close
()
{
void
close
()
{
if
(
IsInitialized
())
holder_
->
Close
();
if
(
IsInitialized
())
holder_
->
Close
();
}
}
...
@@ -97,6 +138,50 @@ class ChannelHolder {
...
@@ -97,6 +138,50 @@ class ChannelHolder {
if
(
IsInitialized
())
holder_
->
Unlock
();
if
(
IsInitialized
())
holder_
->
Unlock
();
}
}
template
<
typename
T
>
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToSendQ
(
referrer
,
data
,
cond
,
cb
);
}
}
}
template
<
typename
T
>
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToReceiveQ
(
referrer
,
data
,
cond
,
cb
);
}
}
}
template
<
typename
T
>
void
RemoveFromSendQ
(
const
void
*
referrer
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
RemoveFromSendQ
(
referrer
);
}
}
}
template
<
typename
T
>
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
if
(
IsInitialized
())
{
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
RemoveFromReceiveQ
(
referrer
);
}
}
}
inline
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
inline
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
inline
const
std
::
type_index
Type
()
{
inline
const
std
::
type_index
Type
()
{
...
@@ -113,6 +198,9 @@ class ChannelHolder {
...
@@ -113,6 +198,9 @@ class ChannelHolder {
virtual
~
Placeholder
()
{}
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_index
Type
()
const
=
0
;
virtual
const
std
::
type_index
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
void
Unlock
()
=
0
;
...
@@ -129,6 +217,27 @@ class ChannelHolder {
...
@@ -129,6 +217,27 @@ class ChannelHolder {
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
channel_
.
get
());
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
channel_
.
get
());
}
virtual
bool
IsClosed
()
{
if
(
channel_
)
{
return
channel_
->
IsClosed
();
}
return
false
;
}
virtual
bool
CanSend
()
{
if
(
channel_
)
{
return
channel_
->
CanSend
();
}
return
false
;
}
virtual
bool
CanReceive
()
{
if
(
channel_
)
{
return
channel_
->
CanReceive
();
}
return
false
;
}
virtual
void
Close
()
{
virtual
void
Close
()
{
if
(
channel_
)
channel_
->
Close
();
if
(
channel_
)
channel_
->
Close
();
}
}
...
...
paddle/fluid/framework/channel_impl.h
浏览文件 @
d7e5e1f1
...
@@ -29,32 +29,50 @@ class ChannelImpl : public paddle::framework::Channel<T> {
...
@@ -29,32 +29,50 @@ class ChannelImpl : public paddle::framework::Channel<T> {
friend
void
paddle
::
framework
::
CloseChannel
<
T
>
(
Channel
<
T
>
*
);
friend
void
paddle
::
framework
::
CloseChannel
<
T
>
(
Channel
<
T
>
*
);
public:
public:
virtual
bool
CanSend
();
virtual
bool
CanReceive
();
virtual
bool
Send
(
T
*
);
virtual
bool
Send
(
T
*
);
virtual
bool
Receive
(
T
*
);
virtual
bool
Receive
(
T
*
);
virtual
size_t
Cap
()
{
return
cap_
;
}
virtual
size_t
Cap
()
{
return
cap_
;
}
virtual
void
Lock
();
virtual
void
Lock
();
virtual
void
Unlock
();
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
void
Close
();
virtual
void
Close
();
ChannelImpl
(
size_t
);
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
);
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
);
private:
private:
struct
QueueMessage
{
struct
QueueMessage
{
T
*
data
;
T
*
data
;
std
::
condition_variable_any
cond
;
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
;
bool
chan_closed
=
false
;
bool
chan_closed
=
false
;
bool
completed
=
false
;
bool
completed
=
false
;
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
QueueMessage
(
T
*
item
)
:
data
(
item
)
{}
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
:
data
(
item
),
cond
(
cond
)
{}
void
Wait
(
std
::
unique_lock
<
std
::
recursive_mutex
>
&
lock
)
{
void
Wait
(
std
::
unique_lock
<
std
::
recursive_mutex
>
&
lock
)
{
cond
.
wait
(
lock
,
[
this
]()
{
return
completed
;
});
cond
->
wait
(
lock
,
[
this
]()
{
return
completed
;
});
}
}
void
Notify
()
{
void
Notify
()
{
completed
=
true
;
completed
=
true
;
cond
.
notify_all
();
cond
->
notify_all
();
}
}
};
};
...
@@ -87,6 +105,18 @@ ChannelImpl<T>::ChannelImpl(size_t capacity)
...
@@ -87,6 +105,18 @@ ChannelImpl<T>::ChannelImpl(size_t capacity)
PADDLE_ENFORCE_GE
(
capacity
,
0
);
PADDLE_ENFORCE_GE
(
capacity
,
0
);
}
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanSend
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
closed_
&&
(
!
recvq
.
empty
()
||
buf_
.
size
()
<
cap_
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanReceive
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
(
closed_
&&
buf_
.
empty
())
&&
(
!
sendq
.
empty
()
||
buf_
.
size
()
>
0
);
}
template
<
typename
T
>
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
Send
(
T
*
item
)
{
bool
ChannelImpl
<
T
>::
Send
(
T
*
item
)
{
send_ctr
++
;
send_ctr
++
;
...
@@ -105,7 +135,24 @@ bool ChannelImpl<T>::Send(T *item) {
...
@@ -105,7 +135,24 @@ bool ChannelImpl<T>::Send(T *item) {
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
recvq
.
pop_front
();
recvq
.
pop_front
();
// Do the data transfer
// Do the data transfer
*
(
m
->
data
)
=
std
::
move
(
*
item
);
// We will do this data transfer if either of the following
// cases are true
// 1. callback == nullptr // This means it was a regular channel send
// 2. callback returns true
bool
do_send
=
true
;
if
(
m
->
callback
!=
nullptr
)
do_send
=
m
->
callback
(
ChannelAction
::
SEND
);
if
(
do_send
)
*
(
m
->
data
)
=
std
::
move
(
*
item
);
else
// We cannot do the data transfer because
// this QueueMessage was added by Select
// and some other case was executed.
// So call the Send function again.
// We do not care about notifying other
// because they would have been notified
// by the executed select case.
return
Send
(
item
);
// Wake up the blocked process and unlock
// Wake up the blocked process and unlock
m
->
Notify
();
m
->
Notify
();
lock
.
unlock
();
lock
.
unlock
();
...
@@ -150,7 +197,25 @@ bool ChannelImpl<T>::Receive(T *item) {
...
@@ -150,7 +197,25 @@ bool ChannelImpl<T>::Receive(T *item) {
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
sendq
.
pop_front
();
sendq
.
pop_front
();
// Do the data transfer
// Do the data transfer
*
item
=
std
::
move
(
*
(
m
->
data
));
// We will do this data transfer if either of the following
// cases are true
// 1. callback == nullptr // This means it was a regular channel send
// 2. callback returns true
bool
do_receive
=
true
;
if
(
m
->
callback
!=
nullptr
)
do_receive
=
m
->
callback
(
ChannelAction
::
RECEIVE
);
if
(
do_receive
)
*
item
=
std
::
move
(
*
(
m
->
data
));
else
// We cannot do the data transfer because
// this QueueMessage was added by Select
// and some other case was executed.
// So call the Receive function again.
// We do not care about notifying other
// because they would have been notified
// by the executed select case.
return
Receive
(
item
);
// Wake up the blocked process and unlock
// Wake up the blocked process and unlock
m
->
Notify
();
m
->
Notify
();
lock
.
unlock
();
lock
.
unlock
();
...
@@ -186,6 +251,12 @@ void ChannelImpl<T>::Unlock() {
...
@@ -186,6 +251,12 @@ void ChannelImpl<T>::Unlock() {
mu_
.
unlock
();
mu_
.
unlock
();
}
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
IsClosed
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
closed_
;
}
template
<
typename
T
>
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Close
()
{
void
ChannelImpl
<
T
>::
Close
()
{
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
...
@@ -203,6 +274,12 @@ void ChannelImpl<T>::Close() {
...
@@ -203,6 +274,12 @@ void ChannelImpl<T>::Close() {
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
recvq
.
pop_front
();
recvq
.
pop_front
();
m
->
chan_closed
=
true
;
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
m
->
Notify
();
}
}
...
@@ -211,10 +288,72 @@ void ChannelImpl<T>::Close() {
...
@@ -211,10 +288,72 @@ void ChannelImpl<T>::Close() {
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
sendq
.
pop_front
();
sendq
.
pop_front
();
m
->
chan_closed
=
true
;
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
m
->
Notify
();
}
}
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
sendq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
recvq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromSendQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
sendq
.
begin
();
it
!=
sendq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
sendMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
sendMsg
->
referrer
==
referrer
)
{
it
=
sendq
.
erase
(
it
);
send_ctr
--
;
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
recvq
.
begin
();
it
!=
recvq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
recvMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
recvMsg
->
referrer
==
referrer
)
{
it
=
recvq
.
erase
(
it
);
recv_ctr
--
;
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
template
<
typename
T
>
ChannelImpl
<
T
>::~
ChannelImpl
()
{
ChannelImpl
<
T
>::~
ChannelImpl
()
{
Close
();
Close
();
...
...
paddle/fluid/operators/assign_op.cc
浏览文件 @
d7e5e1f1
...
@@ -56,6 +56,7 @@ class AssignFunctor {
...
@@ -56,6 +56,7 @@ class AssignFunctor {
private:
private:
void
copy_tensor
(
const
framework
::
LoDTensor
&
lod_tensor
,
void
copy_tensor
(
const
framework
::
LoDTensor
&
lod_tensor
,
framework
::
LoDTensor
*
out
)
const
{
framework
::
LoDTensor
*
out
)
const
{
if
(
lod_tensor
.
numel
()
==
0
)
return
;
auto
&
out_tensor
=
*
out
;
auto
&
out_tensor
=
*
out
;
TensorCopy
(
lod_tensor
,
lod_tensor
.
place
(),
dev_ctx_
,
&
out_tensor
);
TensorCopy
(
lod_tensor
,
lod_tensor
.
place
(),
dev_ctx_
,
&
out_tensor
);
out_tensor
.
set_lod
(
lod_tensor
.
lod
());
out_tensor
.
set_lod
(
lod_tensor
.
lod
());
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
d7e5e1f1
...
@@ -17,11 +17,14 @@ limitations under the License. */
...
@@ -17,11 +17,14 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
framework
::
OpKernelType
;
using
framework
::
Tensor
;
using
framework
::
Tensor
;
class
MulOp
ShapeInference
:
public
framework
::
InferShapeBase
{
class
MulOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of MulOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
...
@@ -122,7 +125,7 @@ or not. But the output only shares the LoD information with input $X$.
...
@@ -122,7 +125,7 @@ or not. But the output only shares the LoD information with input $X$.
}
}
};
};
class
Mul
OpGrad
:
public
framework
::
OperatorWithKernel
{
class
Mul
GradOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -156,10 +159,7 @@ class MulOpGrad : public framework::OperatorWithKernel {
...
@@ -156,10 +159,7 @@ class MulOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
mul
,
paddle
::
framework
::
OperatorWithKernel
,
ops
::
MulOpMaker
,
REGISTER_OP
(
mul
,
ops
::
MulOp
,
ops
::
MulOpMaker
,
mul_grad
,
ops
::
MulGradOp
);
ops
::
MulOpShapeInference
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
mul_grad
,
ops
::
MulOpGrad
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/mul_op.cu.cc
浏览文件 @
d7e5e1f1
...
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
...
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/mul_op.h"
#include "paddle/fluid/operators/mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
namespace
plat
=
paddle
::
platform
;
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
mul
,
ops
::
MulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
REGISTER_OP_CUDA_KERNEL
(
ops
::
MulKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
mul_grad
,
ops
::
MulGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
mul_grad
,
ops
::
MulGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/mul_op.h
浏览文件 @
d7e5e1f1
...
@@ -48,7 +48,7 @@ class MulKernel : public framework::OpKernel<T> {
...
@@ -48,7 +48,7 @@ class MulKernel : public framework::OpKernel<T> {
}
}
math
::
matmul
<
DeviceContext
,
T
>
(
math
::
matmul
<
DeviceContext
,
T
>
(
context
.
template
device_context
<
DeviceContext
>(),
x_matrix
,
false
,
context
.
template
device_context
<
DeviceContext
>(),
x_matrix
,
false
,
y_matrix
,
false
,
1
,
z
,
0
);
y_matrix
,
false
,
static_cast
<
T
>
(
1
),
z
,
static_cast
<
T
>
(
0
)
);
if
(
z_dim
.
size
()
!=
2
)
{
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
(
z_dim
);
z
->
Resize
(
z_dim
);
}
}
...
...
paddle/fluid/operators/nccl_op.cu.cc
浏览文件 @
d7e5e1f1
...
@@ -106,6 +106,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
...
@@ -106,6 +106,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
T
*
recvbuffer
=
nullptr
;
T
*
recvbuffer
=
nullptr
;
if
(
root
==
gpu_id
)
{
if
(
root
==
gpu_id
)
{
recvbuffer
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
recvbuffer
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
else
{
out
->
Resize
(
framework
::
make_ddim
({
0
}));
}
}
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke reduce. send "
<<
x
->
numel
()
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke reduce. send "
<<
x
->
numel
()
<<
" recv "
<<
out
->
numel
();
<<
" recv "
<<
out
->
numel
();
...
...
paddle/fluid/operators/scatter_op.cc
浏览文件 @
d7e5e1f1
...
@@ -23,24 +23,24 @@ class ScatterOp : public framework::OperatorWithKernel {
...
@@ -23,24 +23,24 @@ class ScatterOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Ref
"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
X
"
),
"Input(
Ref
) of ScatterOp should not be null."
);
"Input(
X
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"I
ndex
"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"I
ds
"
),
"Input(I
ndex
) of ScatterOp should not be null."
);
"Input(I
ds
) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Updates"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Updates"
),
"Input(Updates) of ScatterOp should not be null."
);
"Input(Updates) of ScatterOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ScatterOp should not be null."
);
"Output(Out) of ScatterOp should not be null."
);
auto
updates_dims
=
ctx
->
GetInputDim
(
"Updates"
);
auto
updates_dims
=
ctx
->
GetInputDim
(
"Updates"
);
auto
ref_dims
=
ctx
->
GetInputDim
(
"
Ref
"
);
auto
ref_dims
=
ctx
->
GetInputDim
(
"
X
"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"I
ndex
"
).
size
(),
1
,
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"I
ds
"
).
size
(),
1
,
"Update I
ndex
should be 1-D."
);
"Update I
ds
should be 1-D."
);
PADDLE_ENFORCE_EQ
(
ref_dims
.
size
(),
updates_dims
.
size
(),
PADDLE_ENFORCE_EQ
(
ref_dims
.
size
(),
updates_dims
.
size
(),
"
Ref
erence and Updates should have the same shape size"
);
"
X
erence and Updates should have the same shape size"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Updates"
)[
0
],
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Updates"
)[
0
],
ctx
->
GetInputDim
(
"I
ndex
"
)[
0
],
ctx
->
GetInputDim
(
"I
ds
"
)[
0
],
"Updates and I
ndex
should have same batch-size."
);
"Updates and I
ds
should have same batch-size."
);
framework
::
DDim
data_dim
(
updates_dims
);
framework
::
DDim
data_dim
(
updates_dims
);
for
(
int
i
=
1
;
i
<
data_dim
.
size
();
++
i
)
{
for
(
int
i
=
1
;
i
<
data_dim
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
data_dim
[
i
],
updates_dims
[
i
]);
PADDLE_ENFORCE_EQ
(
data_dim
[
i
],
updates_dims
[
i
]);
...
@@ -52,7 +52,7 @@ class ScatterOp : public framework::OperatorWithKernel {
...
@@ -52,7 +52,7 @@ class ScatterOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Ref
"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
X
"
)
->
type
()),
ctx
.
device_context
());
ctx
.
device_context
());
}
}
};
};
...
@@ -64,14 +64,14 @@ class ScatterGradOp : public framework::OperatorWithKernel {
...
@@ -64,14 +64,14 @@ class ScatterGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Updates"
),
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Updates"
),
ctx
->
GetInputDim
(
"Updates"
));
ctx
->
GetInputDim
(
"Updates"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"
Ref"
),
ctx
->
GetInputDim
(
"Ref
"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"
X"
),
ctx
->
GetInputDim
(
"X
"
));
}
}
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Ref
"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
X
"
)
->
type
()),
ctx
.
device_context
());
ctx
.
device_context
());
}
}
};
};
...
@@ -80,9 +80,8 @@ class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -80,9 +80,8 @@ class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
ScatterOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
ScatterOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Ref"
,
"The source input of scatter op"
);
AddInput
(
"X"
,
"The source input of scatter op"
);
AddInput
(
"Index"
,
AddInput
(
"Ids"
,
"The index input of scatter op where X will be updated"
);
"The index input of scatter op where Ref will be updated"
);
AddInput
(
"Updates"
,
"The updated value of updates op"
);
AddInput
(
"Updates"
,
"The updated value of updates op"
);
AddOutput
(
"Out"
,
"The output of add op"
);
AddOutput
(
"Out"
,
"The output of add op"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
...
@@ -91,8 +90,8 @@ Scatter Operator.
...
@@ -91,8 +90,8 @@ Scatter Operator.
This operator obtains output by updating the input on selected indices on the first axis:
This operator obtains output by updating the input on selected indices on the first axis:
$$
$$
Out =
Ref
\\
Out =
X
\\
Out[I
ndex] = Ref[Index
] + Updates
Out[I
ds] = X[Ids
] + Updates
$$
$$
)DOC"
);
)DOC"
);
...
...
paddle/fluid/operators/scatter_op.cu
浏览文件 @
d7e5e1f1
...
@@ -25,14 +25,14 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -25,14 +25,14 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
"This kernel only runs on GPU device."
);
auto
*
Ref
=
ctx
.
Input
<
Tensor
>
(
"Ref
"
);
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X
"
);
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
Out
->
ShareDataWith
(
*
Ref
);
Out
->
ShareDataWith
(
*
X
);
GPUScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ndex
,
Out
);
GPUScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ds
,
Out
);
}
}
};
};
...
@@ -42,16 +42,16 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -42,16 +42,16 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on GPU device."
);
"This kernel only runs on GPU device."
);
auto
*
d
Ref
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Ref
"
));
auto
*
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X
"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
// In place gradient: d
Ref
= dO
// In place gradient: d
X
= dO
d
Ref
->
ShareDataWith
(
*
dOut
);
d
X
->
ShareDataWith
(
*
dOut
);
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// Gradient by Gather: dUpdates = dO[I
ndex
]
// Gradient by Gather: dUpdates = dO[I
ds
]
GPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ndex
,
dUpdates
);
GPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ds
,
dUpdates
);
}
}
};
};
...
...
paddle/fluid/operators/scatter_op.h
浏览文件 @
d7e5e1f1
...
@@ -29,15 +29,15 @@ class ScatterOpKernel : public framework::OpKernel<T> {
...
@@ -29,15 +29,15 @@ class ScatterOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
"This kernel only runs on CPU."
);
auto
*
Ref
=
ctx
.
Input
<
Tensor
>
(
"Ref
"
);
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X
"
);
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Updates
=
ctx
.
Input
<
Tensor
>
(
"Updates"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
Out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
// In place output: Out =
Ref, Out[Index
] += Updates
// In place output: Out =
X, Out[Ids
] += Updates
Out
->
ShareDataWith
(
*
Ref
);
Out
->
ShareDataWith
(
*
X
);
// Apply ScatterUpdate: Out[index] += Updates[:]
// Apply ScatterUpdate: Out[index] += Updates[:]
ScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ndex
,
Out
);
ScatterAssign
<
T
>
(
ctx
.
device_context
(),
*
Updates
,
*
I
ds
,
Out
);
}
}
};
};
...
@@ -47,16 +47,16 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
...
@@ -47,16 +47,16 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
"This kernel only runs on CPU."
);
auto
*
d
Ref
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Ref
"
));
auto
*
d
X
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X
"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
dUpdates
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Updates"
));
auto
*
I
ndex
=
ctx
.
Input
<
Tensor
>
(
"Index
"
);
auto
*
I
ds
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dOut
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
// In place gradient: d
Ref
= dO
// In place gradient: d
X
= dO
d
Ref
->
ShareDataWith
(
*
dOut
);
d
X
->
ShareDataWith
(
*
dOut
);
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
dUpdates
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// Gradient by Gather: dUpdates += dO[I
ndex
]
// Gradient by Gather: dUpdates += dO[I
ds
]
CPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ndex
,
dUpdates
);
CPUGather
<
T
>
(
ctx
.
device_context
(),
*
dOut
,
*
I
ds
,
dUpdates
);
}
}
};
};
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d7e5e1f1
...
@@ -31,6 +31,7 @@ limitations under the License. */
...
@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/const_value.h"
...
@@ -103,12 +104,14 @@ PYBIND11_PLUGIN(core) {
...
@@ -103,12 +104,14 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
uint16_t
>
)
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint16_t
>
)
#endif
#endif
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
...
@@ -315,7 +318,6 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -315,7 +318,6 @@ All parameter, weight, gradient are variables in Paddle.
#endif
#endif
});
});
// clang-format on
// clang-format on
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
#endif
#endif
...
@@ -423,6 +425,12 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -423,6 +425,12 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"init_devices"
,
&
framework
::
InitDevices
);
m
.
def
(
"init_devices"
,
&
framework
::
InitDevices
);
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
CUDAPlace
&
place
)
->
bool
{
// Only GPUs with Compute Capability >= 53 support float16
return
platform
::
GetCUDAComputeCapability
(
place
.
device
)
>=
53
;
});
#endif
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
d7e5e1f1
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
...
@@ -77,21 +78,32 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
...
@@ -77,21 +78,32 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
place
()))
{
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
place
()))
{
dst_tensor
=
tensor
;
dst_tensor
=
tensor
;
}
}
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
dims_outside
,
strides
);
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
"e"
,
/* np.dtype('e') == np.float16 */
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
else
{
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
}
else
{
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
}
}
}
}
};
};
}
// namespace details
}
// namespace details
inline
py
::
buffer_info
CastToPyBuffer
(
framework
::
Tensor
&
tensor
)
{
inline
py
::
buffer_info
CastToPyBuffer
(
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
>
()(
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
tensor
);
platform
::
float16
>
()(
tensor
);
return
buffer_info
;
return
buffer_info
;
}
}
...
@@ -136,6 +148,22 @@ void PyCPUTensorSetFromArray(
...
@@ -136,6 +148,22 @@ void PyCPUTensorSetFromArray(
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
}
template
<
>
void
PyCPUTensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CPUPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
((
int
)
array
.
shape
()[
i
]);
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
());
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
template
<
typename
T
>
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
void
PyCUDATensorSetFromArray
(
...
@@ -157,6 +185,28 @@ void PyCUDATensorSetFromArray(
...
@@ -157,6 +185,28 @@ void PyCUDATensorSetFromArray(
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
}
}
template
<
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CUDAPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
((
int
)
array
.
shape
()[
i
]);
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
paddle
::
platform
::
GpuMemcpyAsync
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
}
#endif
#endif
}
// namespace pybind
}
// namespace pybind
...
...
python/CMakeLists.txt
浏览文件 @
d7e5e1f1
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
file
(
GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py
)
file
(
GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/ *.py
)
file
(
GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/ *.py
)
set
(
PY_FILES paddle/__init__.py
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
${
HELPERS_PY_FILES
}
${
UTILS_PY_FILES
}
${
UTILS_PY_FILES
}
${
V2_PY_FILES
}
${
FLUID_PY_FILES
}
)
${
FLUID_PY_FILES
}
)
add_custom_target
(
copy_paddle_master
)
if
(
NOT WITH_FLUID
)
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py
)
set
(
PY_FILES
${
PY_FILES
}
${
TRAINER_PY_FILES
}
${
HELPERS_PY_FILES
}
${
V2_PY_FILES
}
)
SET
(
COPY_PADDLE_MASTER
""
)
add_custom_target
(
copy_paddle_master
)
if
(
WITH_GOLANG
)
SET
(
COPY_PADDLE_MASTER
"copy_paddle_master"
)
SET
(
COPY_PADDLE_MASTER
""
)
add_custom_command
(
TARGET
${
COPY_PADDLE_MASTER
}
if
(
WITH_GOLANG
)
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/v2/master/
SET
(
COPY_PADDLE_MASTER
"copy_paddle_master"
)
)
add_custom_command
(
TARGET
${
COPY_PADDLE_MASTER
}
add_dependencies
(
copy_paddle_master paddle_master
)
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/v2/master/
endif
(
WITH_GOLANG
)
)
add_dependencies
(
copy_paddle_master paddle_master
)
endif
(
WITH_GOLANG
)
endif
()
set
(
MKL_SHARED_LIBS
""
)
set
(
MKL_SHARED_LIBS
""
)
set
(
MKL_DEPENDS
""
)
set
(
MKL_DEPENDS
""
)
...
@@ -59,23 +61,28 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
...
@@ -59,23 +61,28 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib*
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib*
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model
${
MKL_DEPENDS
}
)
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
${
MKL_DEPENDS
}
)
if
(
WITH_SWIG_PY
)
if
(
NOT WITH_FLUID
)
list
(
APPEND paddle_python_deps python_api_wheel
)
set
(
paddle_python_deps
${
paddle_python_deps
}
paddle_pserver_main paddle_trainer paddle_merge_model
)
if
(
WITH_SWIG_PY
)
list
(
APPEND paddle_python_deps python_api_wheel
)
endif
()
endif
()
endif
()
add_custom_target
(
paddle_python ALL DEPENDS
${
paddle_python_deps
}
)
add_custom_target
(
paddle_python ALL DEPENDS
${
paddle_python_deps
}
)
set
(
PADDLE_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
set
(
PADDLE_PYTHON_PACKAGE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/dist/
)
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
if
(
NOT WITH_FLUID
)
if
(
WITH_SWIG_PY
)
add_subdirectory
(
paddle/trainer_config_helpers/tests
)
# enable v2 API unittest only when paddle swig api is compiled
if
(
WITH_SWIG_PY
)
add_subdirectory
(
paddle/v2/tests
)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory
(
paddle/v2/reader/tests
)
add_subdirectory
(
paddle/v2/tests
)
add_subdirectory
(
paddle/v2/plot/tests
)
add_subdirectory
(
paddle/v2/reader/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/v2/plot/tests
)
endif
()
endif
()
endif
()
add_subdirectory
(
paddle/fluid/tests
)
endif
()
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/backward.py
浏览文件 @
d7e5e1f1
...
@@ -248,12 +248,15 @@ def _callback_lookup_(op):
...
@@ -248,12 +248,15 @@ def _callback_lookup_(op):
if
o_argu
in
self
.
param_grad_names
:
if
o_argu
in
self
.
param_grad_names
:
allreduce_out_name
=
o_argu
+
"__nccl_all_reduce__"
allreduce_out_name
=
o_argu
+
"__nccl_all_reduce__"
op_desc
=
_create_op_desc_
(
op_desc
=
_create_op_desc_
(
"ncclAllReduce"
,
{
"ncclReduce"
,
{
"X"
:
[
o_argu
],
"X"
:
[
o_argu
],
"Communicator"
:
"Communicator"
:
[
'nccl_com__do_not_change_'
]
[
'nccl_com__do_not_change_'
]
},
{
"Out"
:
[
allreduce_out_name
]},
},
{
"reduction"
:
"ncclSum"
})
{
"Out"
:
[
allreduce_out_name
]},
{
"reduction"
:
"ncclSum"
,
"root"
:
0
},
)
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
op_desc
=
_create_op_desc_
(
op_desc
=
_create_op_desc_
(
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
d7e5e1f1
...
@@ -45,31 +45,13 @@ __activations__ = [
...
@@ -45,31 +45,13 @@ __activations__ = [
]
]
__all__
=
[
__all__
=
[
'mean'
,
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'sigmoid_cross_entropy_with_logits'
,
'mul'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'reshape'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'clip'
,
'scale'
,
'clip_by_norm'
,
'softmax'
,
'sequence_softmax'
,
'logical_and'
,
'logical_or'
,
'sigmoid_cross_entropy_with_logits'
,
'logical_xor'
,
'logical_not'
,
'uniform_random'
,
'elementwise_add'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'elementwise_div'
,
'gaussian_random_batch_size_like'
,
'cumsum'
,
'scatter'
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'clip'
,
'clip_by_norm'
,
'softmax'
,
'sequence_softmax'
,
'logical_and'
,
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'uniform_random'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'gaussian_random_batch_size_like'
,
'cumsum'
,
]
+
__activations__
]
+
__activations__
for
_OP
in
set
(
__all__
):
for
_OP
in
set
(
__all__
):
...
...
python/paddle/fluid/tests/unittests/test_mul_op.py
浏览文件 @
d7e5e1f1
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -69,5 +70,42 @@ class TestMulOp2(OpTest):
...
@@ -69,5 +70,42 @@ class TestMulOp2(OpTest):
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
class
TestFP16MulOp1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
x
=
np
.
random
.
random
((
32
,
84
)).
astype
(
"float16"
)
y
=
np
.
random
.
random
((
84
,
100
)).
astype
(
"float16"
)
self
.
inputs
=
{
'X'
:
x
.
view
(
np
.
uint16
),
'Y'
:
y
.
view
(
np
.
uint16
)}
self
.
outputs
=
{
'Out'
:
np
.
dot
(
x
,
y
)}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
1e-1
)
class
TestFP16MulOp2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
x
=
np
.
random
.
random
((
15
,
4
,
12
,
10
)).
astype
(
"float16"
)
y
=
np
.
random
.
random
((
4
,
30
,
8
,
2
,
9
)).
astype
(
"float16"
)
self
.
inputs
=
{
'X'
:
x
.
view
(
np
.
uint16
),
'Y'
:
y
.
view
(
np
.
uint16
)}
self
.
attrs
=
{
'x_num_col_dims'
:
2
,
'y_num_col_dims'
:
2
,
}
result
=
np
.
dot
(
x
.
reshape
(
15
*
4
,
12
*
10
),
y
.
reshape
(
4
*
30
,
8
*
2
*
9
))
result
=
result
.
reshape
(
15
,
4
,
8
,
2
,
9
)
self
.
outputs
=
{
'Out'
:
result
}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
2e-1
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_scatter_op.py
浏览文件 @
d7e5e1f1
...
@@ -25,7 +25,7 @@ class TestScatterOp(OpTest):
...
@@ -25,7 +25,7 @@ class TestScatterOp(OpTest):
updates_np
=
np
.
random
.
random
((
2
,
3
)).
astype
(
"float32"
)
updates_np
=
np
.
random
.
random
((
2
,
3
)).
astype
(
"float32"
)
output_np
=
np
.
copy
(
ref_np
)
output_np
=
np
.
copy
(
ref_np
)
output_np
[
index_np
]
=
updates_np
output_np
[
index_np
]
=
updates_np
self
.
inputs
=
{
'
Ref'
:
ref_np
,
'Index
'
:
index_np
,
'Updates'
:
updates_np
}
self
.
inputs
=
{
'
X'
:
ref_np
,
'Ids
'
:
index_np
,
'Updates'
:
updates_np
}
self
.
outputs
=
{
'Out'
:
output_np
}
self
.
outputs
=
{
'Out'
:
output_np
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
python/setup.py.in
浏览文件 @
d7e5e1f1
...
@@ -62,20 +62,22 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
...
@@ -62,20 +62,22 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
packages=['paddle',
packages=['paddle',
'paddle.proto',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils',
'paddle.utils',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'paddle.fluid',
'paddle.fluid',
'paddle.fluid.proto',
'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
'paddle.fluid.proto.profiler',
'paddle.fluid.layers',
'paddle.fluid.layers']
'py_paddle']
if '${WITH_FLUID}'== 'OFF':
packages+=['paddle.proto',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
setup_requires = f.read().splitlines()
setup_requires = f.read().splitlines()
...
@@ -84,11 +86,29 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
...
@@ -84,11 +86,29 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=['opencv-python']
setup_requires+=['opencv-python']
# the prefix is sys.prefix which should always be usr
# the prefix is sys.prefix which should always be usr
paddle_bin_dir = 'opt/paddle/bin'
paddle_bins = ''
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
if '${WITH_FLUID}'== 'OFF':
'${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
paddle_bin_dir = 'opt/paddle/bin'
'${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main',
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
'${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
'${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main',
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
package_data={'paddle.fluid': ['core.so']}
if '${WITH_FLUID}'== 'OFF':
package_data['paddle.v2.master']=['libpaddle_master.so']
package_data['py_paddle']=['*.py','_swig_paddle.so']
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
}
if '${WITH_FLUID}'== 'OFF':
package_dir['py_paddle']='${PADDLE_SOURCE_DIR}/paddle/py_paddle'
paddle_rt_lib_dir = 'lib'
paddle_rt_lib_dir = 'lib'
paddle_rt_libs = ['${WARPCTC_LIBRARIES}']
paddle_rt_libs = ['${WARPCTC_LIBRARIES}']
...
@@ -101,19 +121,8 @@ setup(name='${PACKAGE_NAME}',
...
@@ -101,19 +121,8 @@ setup(name='${PACKAGE_NAME}',
install_requires=setup_requires,
install_requires=setup_requires,
packages=packages,
packages=packages,
ext_modules=[Extension('_foo', ['stub.cc'])],
ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={
package_data=package_data,
'paddle.v2.master': ['libpaddle_master.so'],
package_dir=package_dir,
'paddle.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so']
},
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
},
scripts=paddle_bins,
scripts=paddle_bins,
data_files=[(paddle_rt_lib_dir, paddle_rt_libs)]
data_files=[(paddle_rt_lib_dir, paddle_rt_libs)]
)
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录