Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
9f488783
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9f488783
编写于
4月 05, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into change-mklml-download-url
上级
72913dc2
7bf82f82
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
265 addition
and
164 deletion
+265
-164
paddle/fluid/framework/block_desc.h
paddle/fluid/framework/block_desc.h
+3
-0
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+4
-3
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+11
-10
paddle/fluid/framework/channel_test.cc
paddle/fluid/framework/channel_test.cc
+80
-80
paddle/fluid/framework/lod_tensor.h
paddle/fluid/framework/lod_tensor.h
+1
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+19
-2
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+5
-1
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+22
-0
paddle/fluid/operators/fc_mkldnn_op.cc
paddle/fluid/operators/fc_mkldnn_op.cc
+3
-3
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+3
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+3
-2
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+13
-5
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+31
-10
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+46
-26
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+21
-21
未找到文件。
paddle/fluid/framework/block_desc.h
浏览文件 @
9f488783
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <deque>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
...
...
@@ -96,6 +97,8 @@ class BlockDesc {
*/
void
RemoveOp
(
size_t
s
,
size_t
e
);
void
RemoveVar
(
const
std
::
string
&
name
)
{
vars_
.
erase
(
name
);
}
std
::
vector
<
OpDesc
*>
AllOps
()
const
;
size_t
OpSize
()
const
{
return
ops_
.
size
();
}
...
...
paddle/fluid/framework/channel.h
浏览文件 @
9f488783
...
...
@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable>
#include <stddef.h>
// for size_t
#include <condition_variable>
// NOLINT
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -216,7 +216,8 @@ class ChannelHolder {
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
explicit
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
}
...
...
paddle/fluid/framework/channel_impl.h
浏览文件 @
9f488783
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <atomic>
#include <condition_variable>
#include <condition_variable>
// NOLINT
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -38,7 +38,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
void
Close
();
ChannelImpl
(
size_t
);
explicit
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
...
...
@@ -60,7 +60,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
QueueMessage
(
T
*
item
)
explicit
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
...
...
@@ -88,15 +88,15 @@ class ChannelImpl : public paddle::framework::Channel<T> {
}
std
::
shared_ptr
<
QueueMessage
>
get_first_message
(
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
&
queue
,
ChannelAction
action
)
{
while
(
!
queue
.
empty
())
{
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
*
queue
,
ChannelAction
action
)
{
while
(
!
queue
->
empty
())
{
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
.
front
();
queue
.
pop_front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
->
front
();
queue
->
pop_front
();
if
(
m
->
callback
==
nullptr
||
m
->
callback
(
action
))
return
m
;
}
return
nullptr
;
...
...
@@ -147,7 +147,7 @@ void ChannelImpl<T>::Send(T *item) {
// to send to the receiver, bypassing the channel buffer if any
if
(
!
recvq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
recvq
,
ChannelAction
::
SEND
);
get_first_message
(
&
recvq
,
ChannelAction
::
SEND
);
if
(
m
!=
nullptr
)
{
*
(
m
->
data
)
=
std
::
move
(
*
item
);
...
...
@@ -198,7 +198,7 @@ bool ChannelImpl<T>::Receive(T *item) {
// buffer and move front of send queue to the buffer
if
(
!
sendq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
sendq
,
ChannelAction
::
RECEIVE
);
get_first_message
(
&
sendq
,
ChannelAction
::
RECEIVE
);
if
(
buf_
.
size
()
>
0
)
{
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
...
...
@@ -219,8 +219,9 @@ bool ChannelImpl<T>::Receive(T *item) {
if
(
m
!=
nullptr
)
{
*
item
=
std
::
move
(
*
(
m
->
data
));
m
->
Notify
();
}
else
}
else
{
return
recv_return
(
Receive
(
item
));
}
}
return
recv_return
(
true
);
}
...
...
paddle/fluid/framework/channel_test.cc
浏览文件 @
9f488783
...
...
@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <chrono>
#include <thread>
#include <chrono>
// NOLINT
#include <thread>
// NOLINT
#include "gtest/gtest.h"
using
paddle
::
framework
::
Channel
;
...
...
@@ -166,9 +166,9 @@ TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) {
std
::
thread
t
([
&
]()
{
// Try to write more than buffer size.
for
(
size_t
i
=
0
;
i
<
2
*
buffer_size
;
++
i
)
{
if
(
i
<
buffer_size
)
if
(
i
<
buffer_size
)
{
ch
->
Send
(
&
i
);
// should block after 10 iterations
else
{
}
else
{
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
i
);
...
...
@@ -212,12 +212,12 @@ TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel3) {
}
void
ChannelCloseUnblocksReceiversTest
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -230,7 +230,7 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
...
...
@@ -241,21 +241,21 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelCloseUnblocksSendersTest
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -277,13 +277,13 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -294,21 +294,21 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a buffered channel also unblocks
...
...
@@ -409,13 +409,13 @@ TEST(Channel, UnbufferedMoreReceiveLessSendTest) {
// This tests that destroying a channel unblocks
// any senders waiting for channel to have write space
void
ChannelDestroyUnblockSenders
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -438,14 +438,14 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -454,13 +454,13 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successful sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
...
...
@@ -473,18 +473,18 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channel also unblocks
// any receivers waiting on the channel
void
ChannelDestroyUnblockReceivers
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -498,18 +498,18 @@ void ChannelDestroyUnblockReceivers(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
Channel
,
BufferedChannelDestroyUnblocksReceiversTest
)
{
...
...
@@ -679,12 +679,12 @@ TEST(ChannelHolder, TypeMismatchReceiveTest) {
}
void
ChannelHolderCloseUnblocksReceiversTest
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -697,7 +697,7 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
...
...
@@ -708,21 +708,21 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelHolderCloseUnblocksSendersTest
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -744,13 +744,13 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -761,21 +761,21 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a channelholder unblocks
...
...
@@ -813,13 +813,13 @@ TEST(Channel, ChannelHolderCloseUnblocksSendersTest) {
// This tests that destroying a channelholder unblocks
// any senders waiting for channel
void
ChannelHolderDestroyUnblockSenders
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
send_success
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
...
...
@@ -841,14 +841,14 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
...
@@ -857,13 +857,13 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successfuld sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
...
...
@@ -876,18 +876,18 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channelholder also unblocks
// any receivers waiting on the channel
void
ChannelHolderDestroyUnblockReceivers
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -901,18 +901,18 @@ void ChannelHolderDestroyUnblockReceivers(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksReceiversTest
)
{
...
...
@@ -945,12 +945,12 @@ TEST(ChannelHolder, ChannelHolderDestroyUnblocksSendersTest) {
// This tests that closing a channelholder many times.
void
ChannelHolderManyTimesClose
(
ChannelHolder
*
ch
)
{
const
int
num_t
hreads
=
15
;
std
::
thread
t
[
num_t
hreads
];
bool
thread_ended
[
num_t
hreads
];
const
int
kNumT
hreads
=
15
;
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to send data to channel.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
)
{
...
...
@@ -962,7 +962,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
// Launches threads that try to receive data to channel.
for
(
size_t
i
=
num_threads
/
3
;
i
<
2
*
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
kNumThreads
/
3
;
i
<
2
*
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -976,7 +976,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
// Launches threads that try to close the channel.
for
(
size_t
i
=
2
*
num_threads
/
3
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
2
*
kNumThreads
/
3
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
...
...
@@ -991,13 +991,13 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
EXPECT_TRUE
(
ch
->
IsClosed
());
// delete the channel
delete
ch
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderManyTimesCloseTest
)
{
...
...
paddle/fluid/framework/lod_tensor.h
浏览文件 @
9f488783
...
...
@@ -142,6 +142,7 @@ class LoDTensor : public Tensor {
return
(
lod_
)[
level
].
size
()
-
1
;
}
// Split LoDTensor and copy to each place specified in places.
std
::
vector
<
LoDTensor
>
SplitLoDTensor
(
const
std
::
vector
<
platform
::
Place
>
places
)
const
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
9f488783
...
...
@@ -150,13 +150,30 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif
}
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
platform
::
RecordBlock
b
(
0
);
SplitTensorToPlaces
(
feed_tensors
);
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
*
member_
->
global_scope_
->
Var
(
fetched_var_name
)
->
GetMutable
<
FeedFetchList
>
()
=
fetch_data
;
}
void
ParallelExecutor
::
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
for
(
auto
it
:
feed_tensors
)
{
auto
lod_tensors
=
it
.
second
.
SplitLoDTensor
(
member_
->
places_
);
for
(
size_t
j
=
0
;
j
<
member_
->
places_
.
size
();
++
j
)
{
// TODO(panxy0718): Do I need to delete this var?
member_
->
local_scopes_
[
j
]
->
Var
(
it
.
first
)
->
GetMutable
<
LoDTensor
>
()
->
ShareDataWith
(
lod_tensors
[
j
]);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/parallel_executor.h
浏览文件 @
9f488783
...
...
@@ -42,9 +42,13 @@ class ParallelExecutor {
bool
allow_op_delay
);
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
=
"fetched_var"
);
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
private:
void
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
ParallelExecutorPrivate
*
member_
;
void
BCastParamsToGPUs
(
const
ProgramDesc
&
startup_program
)
const
;
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
9f488783
...
...
@@ -128,10 +128,32 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
algo
));
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
// Tensor core is supported since the volta GPU and
// is only enabled when input and filter data are float16
if
(
dev_ctx
.
GetComputeCapability
()
>=
70
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_TENSOR_OP_MATH
));
// Currently tensor core is only enabled using this algo
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
}
else
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
}
#endif
// get workspace size able to allocate
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
// It is possible for float16 on Volta GPU to allocate more memory than
// the limit because the algo is overrided to use tensor core.
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
// Allocate on GPU memory
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
...
...
paddle/fluid/operators/fc_mkldnn_op.cc
浏览文件 @
9f488783
...
...
@@ -27,8 +27,8 @@ template <typename T>
class
MKLDNNMD
{
public:
explicit
MKLDNNMD
(
const
T
*
in
,
const
T
*
w
,
bool
bias
)
:
in
{
paddle
::
framework
::
vectorize2int
(
in
->
dims
())}
,
w
{
paddle
::
framework
::
vectorize2int
(
w
->
dims
())}
{
:
in
(
paddle
::
framework
::
vectorize2int
(
in
->
dims
()))
,
w
(
paddle
::
framework
::
vectorize2int
(
w
->
dims
()))
{
with_bias_
=
bias
;
}
...
...
@@ -78,7 +78,7 @@ class MKLDNNMD {
class
MKLDNNMemory
{
public:
MKLDNNMemory
(
MKLDNNMD
<
Tensor
>*
t
,
const
mkldnn
::
engine
&
e
)
:
md_
{
t
},
engine_
{
e
}
{}
:
md_
(
t
),
engine_
(
e
)
{}
virtual
~
MKLDNNMemory
()
=
default
;
template
<
typename
Output
>
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
9f488783
...
...
@@ -257,9 +257,11 @@ class ScopedConvolutionDescriptor {
}
#endif
cudnnDataType_t
compute_type
=
(
type
==
CUDNN_DATA_DOUBLE
)
?
CUDNN_DATA_DOUBLE
:
CUDNN_DATA_FLOAT
;
PADDLE_ENFORCE
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
CUDNN_CROSS_CORRELATION
,
type
));
CUDNN_CROSS_CORRELATION
,
compute_
type
));
return
desc_
;
}
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
9f488783
...
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include <cudnn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex>
// NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
...
...
@@ -140,7 +140,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7001
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(cudnnSetConvolutionGroupCount);
__macro(cudnnSetConvolutionGroupCount); \
__macro(cudnnSetConvolutionMathType);
CUDNN_DNN_ROUTINE_EACH_R7
(
DECLARE_DYNAMIC_LOAD_CUDNN_WRAP
)
#endif
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
9f488783
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#include "paddle/fluid/pybind/protobuf.h"
#include <deque>
#include <iostream>
#include <string>
#include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
...
...
@@ -98,7 +100,7 @@ namespace pybind {
using
namespace
paddle
::
framework
;
// NOLINT
template
<
typename
T
>
static
py
::
bytes
SerializeMessage
(
T
&
self
)
{
static
py
::
bytes
SerializeMessage
(
T
&
self
)
{
// NOLINT
// Check IsInitialized in Python
std
::
string
retv
;
PADDLE_ENFORCE
(
self
.
Proto
()
->
SerializePartialToString
(
&
retv
),
...
...
@@ -107,7 +109,7 @@ static py::bytes SerializeMessage(T &self) {
}
// Bind Methods
void
BindProgramDesc
(
py
::
module
&
m
)
{
void
BindProgramDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
ProgramDesc
>
(
m
,
"ProgramDesc"
,
""
)
.
def
(
py
::
init
<>
())
.
def
(
"__init__"
,
...
...
@@ -151,7 +153,7 @@ void BindProgramDesc(py::module &m) {
});
}
void
BindBlockDesc
(
py
::
module
&
m
)
{
void
BindBlockDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
BlockDesc
>
(
m
,
"BlockDesc"
,
""
)
.
def_property_readonly
(
"id"
,
&
BlockDesc
::
ID
)
.
def_property_readonly
(
"parent"
,
&
BlockDesc
::
Parent
)
...
...
@@ -200,13 +202,19 @@ void BindBlockDesc(py::module &m) {
return
self
.
FindVarRecursive
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"remove_var"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
return
self
.
RemoveVar
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"all_vars"
,
&
BlockDesc
::
AllVars
,
py
::
return_value_policy
::
reference
)
.
def
(
"op_size"
,
&
BlockDesc
::
OpSize
)
.
def
(
"op"
,
&
BlockDesc
::
Op
,
py
::
return_value_policy
::
reference
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
BlockDesc
>
);
}
void
BindVarDsec
(
py
::
module
&
m
)
{
void
BindVarDsec
(
py
::
module
&
m
)
{
// NOLINT
py
::
class_
<
VarDesc
>
var_desc
(
m
,
"VarDesc"
,
""
);
var_desc
.
def
(
"name"
,
...
...
@@ -257,7 +265,7 @@ void BindVarDsec(py::module &m) {
.
value
(
"RAW"
,
proto
::
VarType
::
RAW
);
}
void
BindOpDesc
(
py
::
module
&
m
)
{
void
BindOpDesc
(
py
::
module
&
m
)
{
// NOLINT
py
::
enum_
<
proto
::
AttrType
>
(
m
,
"AttrType"
,
""
)
.
value
(
"INT"
,
proto
::
AttrType
::
INT
)
.
value
(
"INTS"
,
proto
::
AttrType
::
INTS
)
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
9f488783
...
...
@@ -26,25 +26,29 @@ class ParallelExecutor(object):
use_cuda
,
num_threads
=
None
,
allow_op_delay
=
False
):
places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
for
i
in
xrange
(
core
.
get_cuda_device_count
()):
p
=
core
.
Place
()
p
.
set_place
(
core
.
CUDAPlace
(
i
))
places
.
append
(
p
)
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
else
:
for
i
in
xrange
(
multiprocessing
.
cpu_count
()):
p
=
core
.
Place
()
p
.
set_place
(
core
.
CPUPlace
())
places
.
append
(
p
)
self
.
_act_places
.
append
(
core
.
CPUPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
assert
self
.
_places
,
"no place for execution"
if
num_threads
is
None
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
num_threads
=
len
(
places
)
num_threads
=
len
(
self
.
_
places
)
else
:
min
(
len
(
places
)
*
2
,
multiprocessing
.
cpu_count
())
min
(
len
(
self
.
_
places
)
*
2
,
multiprocessing
.
cpu_count
())
startup
=
framework
.
default_startup_program
()
main
=
framework
.
default_main_program
()
...
...
@@ -53,7 +57,7 @@ class ParallelExecutor(object):
self
.
executor
=
core
.
ParallelExecutor
(
num_threads
,
True
if
use_cuda
else
False
,
# use_event
places
,
self
.
_
places
,
set
([
p
.
name
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
...
...
@@ -65,8 +69,25 @@ class ParallelExecutor(object):
allow_op_delay
)
self
.
scope
=
scope
def
run
(
self
,
fetch_list
):
def
run
(
self
,
fetch_list
,
feed_dict
=
{}):
"""
:param fetch_list: A list of variable names that will be fetched.
:param feed_dict: A dict mapping for feed variable name to LoDTensor
or numpy array.
:return: fetched value list.
"""
if
not
isinstance
(
feed_dict
,
dict
):
raise
TypeError
(
"feed_dict should be a dict"
)
feed_tensor_dict
=
{}
for
i
,
feed_name
in
enumerate
(
feed_dict
):
feed_tensor
=
feed_dict
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
feed_tensor
.
set
(
feed_dict
[
feed_name
],
self
.
_act_places
[
0
])
feed_tensor_dict
[
feed_name
]
=
feed_tensor
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
,
feed_tensor_dict
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
9f488783
...
...
@@ -21,13 +21,17 @@ import paddle.dataset.mnist as mnist
import
paddle.dataset.wmt16
as
wmt16
def
simple_fc_net
():
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
def
simple_fc_net
(
use_feed
):
if
use_feed
:
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
else
:
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
for
_
in
xrange
(
4
):
hidden
=
fluid
.
layers
.
fc
(
...
...
@@ -42,13 +46,18 @@ def simple_fc_net():
return
loss
def
fc_with_batchnorm
():
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
def
fc_with_batchnorm
(
use_feed
):
if
use_feed
:
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
else
:
reader
=
fluid
.
layers
.
open_recordio_file
(
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
for
_
in
xrange
(
1
):
hidden
=
fluid
.
layers
.
fc
(
...
...
@@ -135,7 +144,9 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
def
SE_ResNeXt152Small
(
batch_size
=
2
):
def
SE_ResNeXt152Small
(
batch_size
=
2
,
use_feed
=
False
):
assert
not
use_feed
,
"SE_ResNeXt doesn't support feed yet"
img
=
fluid
.
layers
.
fill_constant
(
shape
=
[
batch_size
,
3
,
224
,
224
],
dtype
=
'float32'
,
value
=
0.0
)
label
=
fluid
.
layers
.
fill_constant
(
...
...
@@ -185,30 +196,28 @@ class TestParallelExecutorBase(unittest.TestCase):
memory_opt
=
True
,
iter
=
10
,
batch_size
=
None
,
allow_op_delay
=
False
):
allow_op_delay
=
False
,
feed_dict
=
{}):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
method
()
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
exe
=
fluid
.
ParallelExecutor
(
loss_name
=
loss
.
name
,
use_cuda
=
True
,
allow_op_delay
=
allow_op_delay
)
exe
=
fluid
.
ParallelExecutor
(
loss_name
=
loss
.
name
,
use_cuda
=
True
)
if
batch_size
is
not
None
:
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
begin
=
time
.
time
()
first_loss
,
=
exe
.
run
([
loss
.
name
])
first_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
first_loss
=
numpy
.
array
(
first_loss
)
for
i
in
xrange
(
iter
):
exe
.
run
([])
exe
.
run
([]
,
feed_dict
=
feed_dict
)
last_loss
,
=
exe
.
run
([
loss
.
name
])
last_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
end
=
time
.
time
()
if
batch_size
is
not
None
:
...
...
@@ -242,9 +251,19 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
def
test_batchnorm_fc
(
self
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
class
TestResnet
(
TestParallelExecutorBase
):
...
...
@@ -400,7 +419,8 @@ def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
import
transformer_model
def
transformer
():
def
transformer
(
use_feed
):
assert
not
use_feed
,
"transfomer doesn't support feed yet"
return
transformer_model
.
transformer
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
...
...
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
浏览文件 @
9f488783
...
...
@@ -19,9 +19,9 @@ from paddle.fluid.framework import Program
class
TestOpDesc
(
unittest
.
TestCase
):
def
test_op_desc
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op
=
block
.
append_op
()
self
.
assertIsNotNone
(
op
)
...
...
@@ -67,7 +67,7 @@ class TestOpDesc(unittest.TestCase):
self
.
assertEqual
(
8
,
len
(
op
.
attr_names
()))
op
.
set_block_attr
(
"block_attr"
,
prog
.
block
(
0
))
op
.
set_block_attr
(
"block_attr"
,
prog
ram_desc
.
block
(
0
))
self
.
assertEqual
(
0
,
op
.
block_attr
(
"block_attr"
))
mul_op
=
block
.
append_op
()
...
...
@@ -88,20 +88,20 @@ class TestProgramDesc(unittest.TestCase):
del
program_desc
def
test_append_block
(
self
):
prog_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog_desc
)
block_root
=
prog_desc
.
block
(
0
)
prog
ram
_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram
_desc
)
block_root
=
prog
ram
_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block_root
)
self
.
assertEqual
(
block_root
.
id
,
0
)
block1
=
prog_desc
.
append_block
(
block_root
)
block2
=
prog_desc
.
append_block
(
block1
)
block1
=
prog
ram
_desc
.
append_block
(
block_root
)
block2
=
prog
ram
_desc
.
append_block
(
block1
)
self
.
assertIsNotNone
(
block1
)
self
.
assertEqual
(
block1
.
id
,
block2
.
parent
)
self
.
assertEqual
(
block_root
.
id
,
block1
.
parent
)
block3
=
prog_desc
.
append_block
(
block_root
)
block3
=
prog
ram
_desc
.
append_block
(
block_root
)
self
.
assertEqual
(
block3
.
parent
,
block_root
.
id
)
self
.
assertEqual
(
prog_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
4
,
prog_desc
.
num_blocks
())
self
.
assertEqual
(
prog
ram
_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
4
,
prog
ram
_desc
.
num_blocks
())
class
TestVarDesc
(
unittest
.
TestCase
):
...
...
@@ -162,9 +162,9 @@ class TestVarDesc(unittest.TestCase):
class
TestBlockDesc
(
unittest
.
TestCase
):
def
test_add_var
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
var1
=
block
.
var
(
"var1"
)
var2
=
block
.
var
(
"var2"
)
...
...
@@ -175,9 +175,9 @@ class TestBlockDesc(unittest.TestCase):
self
.
assertEqual
(
var2_re
,
var2
)
def
test_add_op
(
self
):
prog
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op1
=
block
.
append_op
()
op2
=
block
.
append_op
()
...
...
@@ -189,9 +189,9 @@ class TestBlockDesc(unittest.TestCase):
def
test_remove_op
(
self
):
program
=
Program
()
prog
=
program
.
desc
self
.
assertIsNotNone
(
prog
)
block
=
prog
.
block
(
0
)
prog
ram_desc
=
program
.
desc
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
op0
=
block
.
append_op
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录