Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9f488783
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
9f488783
编写于
4月 05, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into change-mklml-download-url
上级
72913dc2
7bf82f82
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
265 addition
and
164 deletion
+265
-164
paddle/fluid/framework/block_desc.h
paddle/fluid/framework/block_desc.h
+3
-0
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+4
-3
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+11
-10
paddle/fluid/framework/channel_test.cc
paddle/fluid/framework/channel_test.cc
+80
-80
paddle/fluid/framework/lod_tensor.h
paddle/fluid/framework/lod_tensor.h
+1
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+19
-2
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+5
-1
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+22
-0
paddle/fluid/operators/fc_mkldnn_op.cc
paddle/fluid/operators/fc_mkldnn_op.cc
+3
-3
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+3
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+3
-2
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+13
-5
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+31
-10
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+46
-26
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+21
-21
未找到文件。
paddle/fluid/framework/block_desc.h
浏览文件 @
9f488783
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <deque>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
...
...
@@ -96,6 +97,8 @@ class BlockDesc {
*/
void
RemoveOp
(
size_t
s
,
size_t
e
);
void
RemoveVar
(
const
std
::
string
&
name
)
{
vars_
.
erase
(
name
);
}
std
::
vector
<
OpDesc
*>
AllOps
()
const
;
size_t
OpSize
()
const
{
return
ops_
.
size
();
}
...
...
paddle/fluid/framework/channel.h
浏览文件 @
9f488783
...
...
@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable>
#include <stddef.h>
// for size_t
#include <condition_variable>
// NOLINT
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -216,7 +216,8 @@ class ChannelHolder {
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
explicit
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
}
...
...
paddle/fluid/framework/channel_impl.h
浏览文件 @
9f488783
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <atomic>
#include <condition_variable>
#include <condition_variable>
// NOLINT
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -38,7 +38,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
void
Close
();
ChannelImpl
(
size_t
);
explicit
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
...
...
@@ -60,7 +60,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
QueueMessage
(
T
*
item
)
explicit
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
...
...
@@ -88,15 +88,15 @@ class ChannelImpl : public paddle::framework::Channel<T> {
}
std
::
shared_ptr
<
QueueMessage
>
get_first_message
(
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
&
queue
,
ChannelAction
action
)
{
while
(
!
queue
.
empty
())
{
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
*
queue
,
ChannelAction
action
)
{
while
(
!
queue
->
empty
())
{
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
.
front
();
queue
.
pop_front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
->
front
();
queue
->
pop_front
();
if
(
m
->
callback
==
nullptr
||
m
->
callback
(
action
))
return
m
;
}
return
nullptr
;
...
...
@@ -147,7 +147,7 @@ void ChannelImpl<T>::Send(T *item) {
// to send to the receiver, bypassing the channel buffer if any
if
(
!
recvq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
recvq
,
ChannelAction
::
SEND
);
get_first_message
(
&
recvq
,
ChannelAction
::
SEND
);
if
(
m
!=
nullptr
)
{
*
(
m
->
data
)
=
std
::
move
(
*
item
);
...
...
@@ -198,7 +198,7 @@ bool ChannelImpl<T>::Receive(T *item) {
// buffer and move front of send queue to the buffer
if
(
!
sendq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
sendq
,
ChannelAction
::
RECEIVE
);
get_first_message
(
&
sendq
,
ChannelAction
::
RECEIVE
);
if
(
buf_
.
size
()
>
0
)
{
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
...
...
@@ -219,8 +219,9 @@ bool ChannelImpl<T>::Receive(T *item) {
if
(
m
!=
nullptr
)
{
*
item
=
std
::
move
(
*
(
m
->
data
));
m
->
Notify
();
}
else
}
else
{
return
recv_return
(
Receive
(
item
));
}
}
return
recv_return
(
true
);
}
...
...
paddle/fluid/framework/channel_test.cc
浏览文件 @
9f488783
...
...
@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <chrono>
#include <thread>
#include <chrono>
// NOLINT
#include <thread>
// NOLINT
#include "gtest/gtest.h"
using
paddle
::
framework
::
Channel
;
...
...
@@ -166,9 +166,9 @@ TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) {
std
::
thread
t
([
&
]()
{
// Try to write more than buffer size.
for
(
size_t
i
=
0
;
i
<
2
*
buffer_size
;
++
i
)
{
if
(
i
<
buffer_size
)
if
(
i
<
buffer_size
)
{
ch
->
Send
(
&
i
);
// should block after 10 iterations
else
{
}
else
{
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
i
);
...
...
@@ -212,12 +212,12 @@ TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel3) {
}
void
ChannelCloseUnblocksReceiversTest
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -230,7 +230,7 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
...
...
@@ -241,21 +241,21 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelCloseUnblocksSendersTest
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -277,13 +277,13 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -294,21 +294,21 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a buffered channel also unblocks
...
...
@@ -409,13 +409,13 @@ TEST(Channel, UnbufferedMoreReceiveLessSendTest) {
// This tests that destroying a channel unblocks
// any senders waiting for channel to have write space
void
ChannelDestroyUnblockSenders
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -438,14 +438,14 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -454,13 +454,13 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successful sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
...
...
@@ -473,18 +473,18 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channel also unblocks
// any receivers waiting on the channel
void
ChannelDestroyUnblockReceivers
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -498,18 +498,18 @@ void ChannelDestroyUnblockReceivers(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
Channel
,
BufferedChannelDestroyUnblocksReceiversTest
)
{
...
...
@@ -679,12 +679,12 @@ TEST(ChannelHolder, TypeMismatchReceiveTest) {
}
void
ChannelHolderCloseUnblocksReceiversTest
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -697,7 +697,7 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
...
...
@@ -708,21 +708,21 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelHolderCloseUnblocksSendersTest
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -744,13 +744,13 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -761,21 +761,21 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a channelholder unblocks
...
...
@@ -813,13 +813,13 @@ TEST(Channel, ChannelHolderCloseUnblocksSendersTest) {
// This tests that destroying a channelholder unblocks
// any senders waiting for channel
void
ChannelHolderDestroyUnblockSenders
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -841,14 +841,14 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -857,13 +857,13 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successfuld sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
...
...
@@ -876,18 +876,18 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channelholder also unblocks
// any receivers waiting on the channel
void
ChannelHolderDestroyUnblockReceivers
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -901,18 +901,18 @@ void ChannelHolderDestroyUnblockReceivers(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksReceiversTest
)
{
...
...
@@ -945,12 +945,12 @@ TEST(ChannelHolder, ChannelHolderDestroyUnblocksSendersTest) {
// This tests that closing a channelholder many times.
void
ChannelHolderManyTimesClose
(
ChannelHolder
*
ch
)
{
const
int
num_t
hreads
=
15
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
int
kNumT
hreads
=
15
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to send data to channel.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
)
{
...
...
@@ -962,7 +962,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
// Launches threads that try to receive data to channel.
for
(
size_t
i
=
num_threads
/
3
;
i
<
2
*
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
kNumThreads
/
3
;
i
<
2
*
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -976,7 +976,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
// Launches threads that try to close the channel.
for
(
size_t
i
=
2
*
num_threads
/
3
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
2
*
kNumThreads
/
3
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -991,13 +991,13 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
EXPECT_TRUE
(
ch
->
IsClosed
());
// delete the channel
delete
ch
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderManyTimesCloseTest
)
{
...
...
paddle/fluid/framework/lod_tensor.h
浏览文件 @
9f488783
...
...
@@ -142,6 +142,7 @@ class LoDTensor : public Tensor {
return
(
lod_
)[
level
].
size
()
-
1
;
}
// Split LoDTensor and copy to each place specified in places.
std
::
vector
<
LoDTensor
>
SplitLoDTensor
(
const
std
::
vector
<
platform
::
Place
>
places
)
const
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
9f488783
...
...
@@ -150,13 +150,30 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif
}
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
platform
::
RecordBlock
b
(
0
);
SplitTensorToPlaces
(
feed_tensors
);
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
*
member_
->
global_scope_
->
Var
(
fetched_var_name
)
->
GetMutable
<
FeedFetchList
>
()
=
fetch_data
;
}
void
ParallelExecutor
::
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
for
(
auto
it
:
feed_tensors
)
{
auto
lod_tensors
=
it
.
second
.
SplitLoDTensor
(
member_
->
places_
);
for
(
size_t
j
=
0
;
j
<
member_
->
places_
.
size
();
++
j
)
{
// TODO(panxy0718): Do I need to delete this var?
member_
->
local_scopes_
[
j
]
->
Var
(
it
.
first
)
->
GetMutable
<
LoDTensor
>
()
->
ShareDataWith
(
lod_tensors
[
j
]);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/parallel_executor.h
浏览文件 @
9f488783
...
...
@@ -42,9 +42,13 @@ class ParallelExecutor {
bool
allow_op_delay
);
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
=
"fetched_var"
);
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
private:
void
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
ParallelExecutorPrivate
*
member_
;
void
BCastParamsToGPUs
(
const
ProgramDesc
&
startup_program
)
const
;
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
9f488783
...
...
@@ -128,10 +128,32 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
algo
));
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
// Tensor core is supported since the volta GPU and
// is only enabled when input and filter data are float16
if
(
dev_ctx
.
GetComputeCapability
()
>=
70
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_TENSOR_OP_MATH
));
// Currently tensor core is only enabled using this algo
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
}
else
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
}
#endif
// get workspace size able to allocate
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
// It is possible for float16 on Volta GPU to allocate more memory than
// the limit because the algo is overrided to use tensor core.
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
// Allocate on GPU memory
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
...
...
paddle/fluid/operators/fc_mkldnn_op.cc
浏览文件 @
9f488783
...
...
@@ -27,8 +27,8 @@ template <typename T>
class
MKLDNNMD
{
public:
explicit
MKLDNNMD
(
const
T
*
in
,
const
T
*
w
,
bool
bias
)
:
in
{
paddle
::
framework
::
vectorize2int
(
in
->
dims
())}
,
w
{
paddle
::
framework
::
vectorize2int
(
w
->
dims
())}
{
:
in
(
paddle
::
framework
::
vectorize2int
(
in
->
dims
()))
,
w
(
paddle
::
framework
::
vectorize2int
(
w
->
dims
()))
{
with_bias_
=
bias
;
}
...
...
@@ -78,7 +78,7 @@ class MKLDNNMD {
class
MKLDNNMemory
{
public:
MKLDNNMemory
(
MKLDNNMD
<
Tensor
>*
t
,
const
mkldnn
::
engine
&
e
)
:
md_
{
t
},
engine_
{
e
}
{}
:
md_
(
t
),
engine_
(
e
)
{}
virtual
~
MKLDNNMemory
()
=
default
;
template
<
typename
Output
>
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
9f488783
...
...
@@ -257,9 +257,11 @@ class ScopedConvolutionDescriptor {
}
#endif
cudnnDataType_t
compute_type
=
(
type
==
CUDNN_DATA_DOUBLE
)
?
CUDNN_DATA_DOUBLE
:
CUDNN_DATA_FLOAT
;
PADDLE_ENFORCE
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
CUDNN_CROSS_CORRELATION
,
type
));
CUDNN_CROSS_CORRELATION
,
compute_
type
));
return
desc_
;
}
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
9f488783
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include <cudnn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex>
// NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
...
...
@@ -140,7 +140,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7001
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(cudnnSetConvolutionGroupCount);
__macro(cudnnSetConvolutionGroupCount); \
__macro(cudnnSetConvolutionMathType);
CUDNN_DNN_ROUTINE_EACH_R7
(
DECLARE_DYNAMIC_LOAD_CUDNN_WRAP
)
#endif
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
9f488783
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#include "paddle/fluid/pybind/protobuf.h"
#include <deque>
#include <iostream>
#include <string>
#include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
...
...
@@ -98,7 +100,7 @@ namespace pybind {
using
namespace
paddle
::
framework
;
// NOLINT
template
<
typename
T
>
static
py
::
bytes
SerializeMessage
(
T
&
self
)
{
static
py
::
bytes
SerializeMessage
(
T
&
self
)
{
// NOLINT
// Check IsInitialized in Python
std
::
string
retv
;
PADDLE_ENFORCE
(
self
.
Proto
()
->
SerializePartialToString
(
&
retv
),
...
...
@@ -107,7 +109,7 @@ static py::bytes SerializeMessage(T &self) {
}
// Bind Methods
void
BindProgramDesc
(
py
::
module
&
m
)
{
void
BindProgramDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
ProgramDesc
>
(
m
,
"ProgramDesc"
,
""
)
.
def
(
py
::
init
<>
())
.
def
(
"__init__"
,
...
...
@@ -151,7 +153,7 @@ void BindProgramDesc(py::module &m) {
});
}
void
BindBlockDesc
(
py
::
module
&
m
)
{
void
BindBlockDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
BlockDesc
>
(
m
,
"BlockDesc"
,
""
)
.
def_property_readonly
(
"id"
,
&
BlockDesc
::
ID
)
.
def_property_readonly
(
"parent"
,
&
BlockDesc
::
Parent
)
...
...
@@ -200,13 +202,19 @@ void BindBlockDesc(py::module &m) {
return
self
.
FindVarRecursive
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"remove_var"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
return
self
.
RemoveVar
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"all_vars"
,
&
BlockDesc
::
AllVars
,
py
::
return_value_policy
::
reference
)
.
def
(
"op_size"
,
&
BlockDesc
::
OpSize
)
.
def
(
"op"
,
&
BlockDesc
::
Op
,
py
::
return_value_policy
::
reference
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
BlockDesc
>
);
}
void
BindVarDsec
(
py
::
module
&
m
)
{
void
BindVarDsec
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
VarDesc
>
var_desc
(
m
,
"VarDesc"
,
""
);
var_desc
.
def
(
"name"
,
...
...
@@ -257,7 +265,7 @@ void BindVarDsec(py::module &m) {
.
value
(
"RAW"
,
proto
::
VarType
::
RAW
);
}
void
BindOpDesc
(
py
::
module
&
m
)
{
void
BindOpDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
enum_
<
proto
::
AttrType
>
(
m
,
"AttrType"
,
""
)
.
value
(
"INT"
,
proto
::
AttrType
::
INT
)
.
value
(
"INTS"
,
proto
::
AttrType
::
INTS
)
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
9f488783
...
...
@@ -26,25 +26,29 @@ class ParallelExecutor(object):
use_cuda
,
num_threads
=
None
,
allow_op_delay
=
False
):
places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
for
i
in
xrange
(
core
.
get_cuda_device_count
()):
p
=
core
.
Place
()
p
.
set_place
(
core
.
CUDAPlace
(
i
))
places
.
append
(
p
)
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
else
:
for
i
in
xrange
(
multiprocessing
.
cpu_count
()):
p
=
core
.
Place
()
p
.
set_place
(
core
.
CPUPlace
())
places
.
append
(
p
)
self
.
_act_places
.
append
(
core
.
CPUPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
assert
self
.
_places
,
"no place for execution"
if
num_threads
is
None
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
num_threads
=
len
(
places
)
num_threads
=
len
(
self
.
_
places
)
else
:
min
(
len
(
places
)
*
2
,
multiprocessing
.
cpu_count
())
min
(
len
(
self
.
_
places
)
*
2
,
multiprocessing
.
cpu_count
())
startup
=
framework
.
default_startup_program
()
main
=
framework
.
default_main_program
()
...
...
@@ -53,7 +57,7 @@ class ParallelExecutor(object):
self
.
executor
=
core
.
ParallelExecutor
(
num_threads
,
True
if
use_cuda
else
False
,
# use_event
places
,
self
.
_
places
,
set
([
p
.
name
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
...
...
@@ -65,8 +69,25 @@ class ParallelExecutor(object):
allow_op_delay
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
):
def
run
(
self
,
fetch_list
,
feed_dict
=
{}):
"""
:param fetch_list: A list of variable names that will be fetched.
:param feed_dict: A dict mapping for feed variable name to LoDTensor
or numpy array.
:return: fetched value list.
"""
if
not
isinstance
(
feed_dict
,
dict
):
raise
TypeError
(
"feed_dict should be a dict"
)
feed_tensor_dict
=
{}
for
i
,
feed_name
in
enumerate
(
feed_dict
):
feed_tensor
=
feed_dict
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
feed_tensor
.
set
(
feed_dict
[
feed_name
],
self
.
_act_places
[
0
])
feed_tensor_dict
[
feed_name
]
=
feed_tensor
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
,
feed_tensor_dict
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
9f488783
...
...
@@ -21,13 +21,17 @@ import paddle.dataset.mnist as mnist
import
paddle.dataset.wmt16
as
wmt16
def
simple_fc_net
():
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
def
simple_fc_net
(
use_feed
):
if
use_feed
:
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
else
:
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
for
_
in
xrange
(
4
):
hidden
=
fluid
.
layers
.
fc
(
...
...
@@ -42,13 +46,18 @@ def simple_fc_net():
return
loss
def
fc_with_batchnorm
():
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
def
fc_with_batchnorm
(
use_feed
):
if
use_feed
:
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
else
:
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
for
_
in
xrange
(
1
):
hidden
=
fluid
.
layers
.
fc
(
...
...
@@ -135,7 +144,9 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
def
SE_ResNeXt152Small
(
batch_size
=
2
):
def
SE_ResNeXt152Small
(
batch_size
=
2
,
use_feed
=
False
):
assert
not
use_feed
,
"SE_ResNeXt doesn't support feed yet"
img
=
fluid
.
layers
.
fill_constant
(
shape
=
[
batch_size
,
3
,
224
,
224
],
dtype
=
'float32'
,
value
=
0.0
)
label
=
fluid
.
layers
.
fill_constant
(
...
...
@@ -185,30 +196,28 @@ class TestParallelExecutorBase(unittest.TestCase):
memory_opt
=
True
,
iter
=
10
,
batch_size
=
None
,
allow_op_delay
=
False
):
allow_op_delay
=
False
,
feed_dict
=
{}):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
method
()
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
exe
=
fluid
.
ParallelExecutor
(
loss_name
=
loss
.
name
,
use_cuda
=
True
,
allow_op_delay
=
allow_op_delay
)
exe
=
fluid
.
ParallelExecutor
(
loss_name
=
loss
.
name
,
use_cuda
=
True
)
if
batch_size
is
not
None
:
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
begin
=
time
.
time
()
first_loss
,
=
exe
.
run
([
loss
.
name
])
first_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
first_loss
=
numpy
.
array
(
first_loss
)
for
i
in
xrange
(
iter
):
exe
.
run
([])
exe
.
run
([]
,
feed_dict
=
feed_dict
)
last_loss
,
=
exe
.
run
([
loss
.
name
])
last_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
end
=
time
.
time
()
if
batch_size
is
not
None
:
...
...
@@ -242,9 +251,19 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
def
test_batchnorm_fc
(
self
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
class
TestResnet
(
TestParallelExecutorBase
):
...
...
@@ -400,7 +419,8 @@ def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
import
transformer_model
def
transformer
():
def
transformer
(
use_feed
):
assert
not
use_feed
,
"transfomer doesn't support feed yet"
return
transformer_model
.
transformer
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
...
...
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
浏览文件 @
9f488783
...
...
@@ -19,9 +19,9 @@ from paddle.fluid.framework import Program
class
TestOpDesc
(
unittest
.
TestCase
):
def
test_op_desc
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op
=
block
.
append_op
()
self
.
assertIsNotNone
(
op
)
...
...
@@ -67,7 +67,7 @@ class TestOpDesc(unittest.TestCase):
self
.
assertEqual
(
8
,
len
(
op
.
attr_names
()))
op
.
set_block_attr
(
"block_attr"
,
prog
.
block
(
0
))
op
.
set_block_attr
(
"block_attr"
,
prog
ram_desc
.
block
(
0
))
self
.
assertEqual
(
0
,
op
.
block_attr
(
"block_attr"
))
mul_op
=
block
.
append_op
()
...
...
@@ -88,20 +88,20 @@ class TestProgramDesc(unittest.TestCase):
del
program_desc
def
test_append_block
(
self
):
prog_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog_desc
)
block_root
=
prog_desc
.
block
(
0
)
prog
ram
_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram
_desc
)
block_root
=
prog
ram
_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block_root
)
self
.
assertEqual
(
block_root
.
id
,
0
)
block1
=
prog_desc
.
append_block
(
block_root
)
block2
=
prog_desc
.
append_block
(
block1
)
block1
=
prog
ram
_desc
.
append_block
(
block_root
)
block2
=
prog
ram
_desc
.
append_block
(
block1
)
self
.
assertIsNotNone
(
block1
)
self
.
assertEqual
(
block1
.
id
,
block2
.
parent
)
self
.
assertEqual
(
block_root
.
id
,
block1
.
parent
)
block3
=
prog_desc
.
append_block
(
block_root
)
block3
=
prog
ram
_desc
.
append_block
(
block_root
)
self
.
assertEqual
(
block3
.
parent
,
block_root
.
id
)
self
.
assertEqual
(
prog_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
4
,
prog_desc
.
num_blocks
())
self
.
assertEqual
(
prog
ram
_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
4
,
prog
ram
_desc
.
num_blocks
())
class
TestVarDesc
(
unittest
.
TestCase
):
...
...
@@ -162,9 +162,9 @@ class TestVarDesc(unittest.TestCase):
class
TestBlockDesc
(
unittest
.
TestCase
):
def
test_add_var
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
var1
=
block
.
var
(
"var1"
)
var2
=
block
.
var
(
"var2"
)
...
...
@@ -175,9 +175,9 @@ class TestBlockDesc(unittest.TestCase):
self
.
assertEqual
(
var2_re
,
var2
)
def
test_add_op
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op1
=
block
.
append_op
()
op2
=
block
.
append_op
()
...
...
@@ -189,9 +189,9 @@ class TestBlockDesc(unittest.TestCase):
def
test_remove_op
(
self
):
program
=
Program
()
prog
=
program
.
desc
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
program
.
desc
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op0
=
block
.
append_op
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录