Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ed2d7d7d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ed2d7d7d
编写于
4月 13, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/mix_cpu_gpu_op
上级
4452ff76
925c17ab
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
254 addition
and
124 deletion
+254
-124
cmake/cblas.cmake
cmake/cblas.cmake
+1
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+65
-39
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+10
-0
paddle/fluid/framework/threadpool.cc
paddle/fluid/framework/threadpool.cc
+19
-0
paddle/fluid/framework/threadpool.h
paddle/fluid/framework/threadpool.h
+36
-20
paddle/fluid/inference/io.cc
paddle/fluid/inference/io.cc
+1
-1
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
...ference/tests/book/test_inference_image_classification.cc
+4
-4
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+19
-5
paddle/fluid/operators/detail/grpc_client.cc
paddle/fluid/operators/detail/grpc_client.cc
+7
-5
paddle/fluid/operators/detail/grpc_server.cc
paddle/fluid/operators/detail/grpc_server.cc
+1
-1
paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
.../fluid/operators/reader/create_double_buffer_reader_op.cc
+22
-40
paddle/fluid/operators/reshape_op.h
paddle/fluid/operators/reshape_op.h
+2
-0
paddle/fluid/operators/uniform_random_op.cc
paddle/fluid/operators/uniform_random_op.cc
+13
-1
paddle/fluid/operators/uniform_random_op.cu
paddle/fluid/operators/uniform_random_op.cu
+13
-1
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+0
-1
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
...on/paddle/fluid/tests/unittests/test_uniform_random_op.py
+41
-5
未找到文件。
cmake/cblas.cmake
浏览文件 @
ed2d7d7d
...
...
@@ -78,7 +78,7 @@ if(NOT CMAKE_CROSSCOMPILING)
/usr/lib/reference/
)
else
()
# Diable the finding of reference cblas under host's system path
# Di
s
able the finding of reference cblas under host's system path
set
(
REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
${
REFERENCE_CBLAS_ROOT
}
/include
)
set
(
REFERENCE_CBLAS_LIB_SEARCH_PATHS
${
REFERENCE_CBLAS_ROOT
}
/lib
)
endif
()
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
ed2d7d7d
...
...
@@ -83,8 +83,8 @@ static void CheckTensorNANOrInf(const std::string& name,
if
(
tensor
.
memory_size
()
==
0
)
{
return
;
}
if
(
tensor
.
type
().
hash_code
()
!=
typeid
(
float
).
hash_code
()
&&
tensor
.
type
().
hash_code
()
!=
typeid
(
double
).
hash_code
())
{
if
(
tensor
.
type
().
hash_code
()
!=
typeid
(
float
).
hash_code
()
&&
// NOLINT
tensor
.
type
().
hash_code
()
!=
typeid
(
double
).
hash_code
())
{
// NOLINT
return
;
}
PADDLE_ENFORCE
(
!
framework
::
TensorContainsInf
(
tensor
),
...
...
@@ -145,12 +145,13 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
// Return true if the block has feed operators and holder of matching info.
static
bool
has_feed_operators
(
const
BlockDesc
&
block
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
feed_targets
,
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
feed_targets
,
const
std
::
string
&
feed_holder_name
)
{
size_t
feed_count
=
0
;
for
(
auto
*
op
:
block
.
AllOps
())
{
if
(
op
->
Type
()
==
kFeedOpType
)
{
feed_count
++
;
// The input variable's name of feed_op should be feed_holder_name.
PADDLE_ENFORCE_EQ
(
op
->
Input
(
"X"
)[
0
],
feed_holder_name
,
"Input to feed op should be '%s'"
,
feed_holder_name
);
std
::
string
feed_target_name
=
op
->
Output
(
"Out"
)[
0
];
...
...
@@ -166,7 +167,8 @@ static bool has_feed_operators(
feed_count
,
feed_targets
.
size
(),
"The number of feed operators should match 'feed_targets'"
);
// When feed operator are present, so should be feed_holder
if
(
!
feed_holder_name
.
empty
())
{
// When feed operator are present, so should be feed_holder.
auto
var
=
block
.
FindVar
(
feed_holder_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Block should already have a '%s' variable"
,
feed_holder_name
);
...
...
@@ -174,6 +176,7 @@ static bool has_feed_operators(
"'%s' variable should be 'FEED_MINIBATCH' type"
,
feed_holder_name
);
}
}
return
feed_count
>
0
;
}
...
...
@@ -185,12 +188,14 @@ static bool has_feed_operators(
// and fetch_holder_name. Raise exception when any mismatch is found.
// Return true if the block has fetch operators and holder of matching info.
static
bool
has_fetch_operators
(
const
BlockDesc
&
block
,
std
::
map
<
std
::
string
,
LoDTensor
*>&
fetch_targets
,
const
BlockDesc
&
block
,
const
std
::
map
<
std
::
string
,
LoDTensor
*>&
fetch_targets
,
const
std
::
string
&
fetch_holder_name
)
{
size_t
fetch_count
=
0
;
for
(
auto
*
op
:
block
.
AllOps
())
{
if
(
op
->
Type
()
==
kFetchOpType
)
{
fetch_count
++
;
// The output variable's name of fetch_op should be fetch_holder_name.
PADDLE_ENFORCE_EQ
(
op
->
Output
(
"Out"
)[
0
],
fetch_holder_name
,
"Output of fetch op should be '%s'"
,
fetch_holder_name
);
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
...
...
@@ -206,7 +211,8 @@ static bool has_fetch_operators(
fetch_count
,
fetch_targets
.
size
(),
"The number of fetch operators should match 'fetch_targets'"
);
// When fetch operator are present, so should be fetch_holder
if
(
!
fetch_holder_name
.
empty
())
{
// When fetch operator are present, so should be fetch_holder.
auto
var
=
block
.
FindVar
(
fetch_holder_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Block should already have a '%s' variable"
,
fetch_holder_name
);
...
...
@@ -214,6 +220,7 @@ static bool has_fetch_operators(
"'%s' variable should be 'FETCH_LIST' type"
,
fetch_holder_name
);
}
}
return
fetch_count
>
0
;
}
...
...
@@ -259,16 +266,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
}
}
// map the data of feed_targets to feed_holder
for
(
auto
*
op
:
global_block
->
AllOps
())
{
if
(
op
->
Type
()
==
kFeedOpType
)
{
std
::
string
feed_target_name
=
op
->
Output
(
"Out"
)[
0
];
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
SetFeedVariable
(
scope
,
*
feed_targets
[
feed_target_name
],
feed_holder_name
,
idx
);
}
}
if
(
!
has_fetch_ops
)
{
// create fetch_holder variable
auto
*
fetch_holder
=
global_block
->
Var
(
fetch_holder_name
);
...
...
@@ -292,17 +289,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
}
}
Run
(
*
copy_program
,
scope
,
0
,
create_vars
,
create_vars
);
// obtain the data of fetch_targets from fetch_holder
for
(
auto
*
op
:
global_block
->
AllOps
())
{
if
(
op
->
Type
()
==
kFetchOpType
)
{
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
*
fetch_targets
[
fetch_target_name
]
=
GetFetchVariable
(
*
scope
,
fetch_holder_name
,
idx
);
}
}
auto
ctx
=
Prepare
(
*
copy_program
,
0
);
RunPreparedContext
(
ctx
.
get
(),
scope
,
feed_targets
,
fetch_targets
,
create_vars
,
feed_holder_name
,
fetch_holder_name
);
}
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
...
...
@@ -370,5 +359,42 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
}
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>&
fetch_targets
,
bool
create_vars
,
const
std
::
string
&
feed_holder_name
,
const
std
::
string
&
fetch_holder_name
)
{
auto
&
global_block
=
ctx
->
prog_
.
Block
(
ctx
->
block_id_
);
PADDLE_ENFORCE
(
has_feed_operators
(
global_block
,
feed_targets
,
feed_holder_name
),
"Program in ExecutorPrepareContext should has feed_ops."
);
PADDLE_ENFORCE
(
has_fetch_operators
(
global_block
,
fetch_targets
,
fetch_holder_name
),
"Program in the prepared context should has fetch_ops."
);
// map the data of feed_targets to feed_holder
for
(
auto
*
op
:
global_block
.
AllOps
())
{
if
(
op
->
Type
()
==
kFeedOpType
)
{
std
::
string
feed_target_name
=
op
->
Output
(
"Out"
)[
0
];
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
SetFeedVariable
(
scope
,
*
feed_targets
[
feed_target_name
],
feed_holder_name
,
idx
);
}
}
RunPreparedContext
(
ctx
,
scope
,
create_vars
,
create_vars
);
// obtain the data of fetch_targets from fetch_holder
for
(
auto
*
op
:
global_block
.
AllOps
())
{
if
(
op
->
Type
()
==
kFetchOpType
)
{
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
*
fetch_targets
[
fetch_target_name
]
=
GetFetchVariable
(
*
scope
,
fetch_holder_name
,
idx
);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/executor.h
浏览文件 @
ed2d7d7d
...
...
@@ -14,6 +14,9 @@ limitations under the License. */
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
...
...
@@ -70,6 +73,13 @@ class Executor {
bool
create_local_scope
=
true
,
bool
create_vars
=
true
);
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>&
fetch_targets
,
bool
create_vars
=
true
,
const
std
::
string
&
feed_holder_name
=
"feed"
,
const
std
::
string
&
fetch_holder_name
=
"fetch"
);
private:
const
platform
::
Place
place_
;
};
...
...
paddle/fluid/framework/threadpool.cc
浏览文件 @
ed2d7d7d
...
...
@@ -14,8 +14,12 @@
#include "paddle/fluid/framework/threadpool.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_int32
(
io_threadpool_size
,
100
,
"number of threads used for doing IO, default 100"
);
namespace
paddle
{
namespace
framework
{
...
...
@@ -91,5 +95,20 @@ void ThreadPool::TaskLoop() {
}
}
std
::
unique_ptr
<
ThreadPool
>
ThreadPoolIO
::
io_threadpool_
(
nullptr
);
std
::
once_flag
ThreadPoolIO
::
io_init_flag_
;
ThreadPool
*
ThreadPoolIO
::
GetInstanceIO
()
{
std
::
call_once
(
io_init_flag_
,
&
ThreadPoolIO
::
InitIO
);
return
io_threadpool_
.
get
();
}
void
ThreadPoolIO
::
InitIO
()
{
if
(
io_threadpool_
.
get
()
==
nullptr
)
{
// TODO(typhoonzero1986): make this configurable
io_threadpool_
.
reset
(
new
ThreadPool
(
FLAGS_io_threadpool_size
));
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/threadpool.h
浏览文件 @
ed2d7d7d
...
...
@@ -14,12 +14,12 @@ limitations under the License. */
#pragma once
#include <condition_variable>
#include <condition_variable>
// NOLINT
#include <functional>
#include <future>
#include <mutex>
#include <future>
// NOLINT
#include <mutex>
// NOLINT
#include <queue>
#include <thread>
#include <thread>
// NOLINT
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -28,6 +28,22 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
struct
ExceptionHandler
{
mutable
std
::
future
<
std
::
unique_ptr
<
platform
::
EnforceNotMet
>>
future_
;
explicit
ExceptionHandler
(
std
::
future
<
std
::
unique_ptr
<
platform
::
EnforceNotMet
>>&&
f
)
:
future_
(
std
::
move
(
f
))
{}
void
operator
()()
const
{
auto
ex
=
this
->
future_
.
get
();
if
(
ex
!=
nullptr
)
{
LOG
(
FATAL
)
<<
"The exception is thrown inside the thread pool. You "
"should use RunAndGetException to handle the exception.
\n
"
"The default exception handler is LOG(FATAL)."
<<
ex
->
what
();
}
}
};
// ThreadPool maintains a queue of tasks, and runs them using a fixed
// number of threads.
class
ThreadPool
{
...
...
@@ -87,22 +103,6 @@ class ThreadPool {
void
Wait
();
private:
struct
ExceptionHandler
{
mutable
std
::
future
<
std
::
unique_ptr
<
platform
::
EnforceNotMet
>>
future_
;
explicit
ExceptionHandler
(
std
::
future
<
std
::
unique_ptr
<
platform
::
EnforceNotMet
>>&&
f
)
:
future_
(
std
::
move
(
f
))
{}
void
operator
()()
const
{
auto
ex
=
this
->
future_
.
get
();
if
(
ex
!=
nullptr
)
{
LOG
(
FATAL
)
<<
"The exception is thrown inside the thread pool. You "
"should use RunAndGetException to handle the exception.
\n
"
"The default exception handler is LOG(FATAL)."
<<
ex
->
what
();
}
}
};
DISABLE_COPY_AND_ASSIGN
(
ThreadPool
);
// If the task queue is empty and avaialbe is equal to the number of
...
...
@@ -135,6 +135,17 @@ class ThreadPool {
std
::
condition_variable
completed_
;
};
class
ThreadPoolIO
:
ThreadPool
{
public:
static
ThreadPool
*
GetInstanceIO
();
static
void
InitIO
();
private:
// NOTE: threadpool in base will be inhereted here.
static
std
::
unique_ptr
<
ThreadPool
>
io_threadpool_
;
static
std
::
once_flag
io_init_flag_
;
};
// Run a function asynchronously.
// NOTE: The function must return void. If the function need to return a value,
// you can use lambda to capture a value pointer.
...
...
@@ -143,5 +154,10 @@ std::future<void> Async(Callback callback) {
return
ThreadPool
::
GetInstance
()
->
Run
(
callback
);
}
template
<
typename
Callback
>
std
::
future
<
void
>
AsyncIO
(
Callback
callback
)
{
return
ThreadPoolIO
::
GetInstanceIO
()
->
Run
(
callback
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/io.cc
浏览文件 @
ed2d7d7d
...
...
@@ -23,7 +23,7 @@ limitations under the License. */
namespace
paddle
{
namespace
inference
{
// Temporaril
l
y add this function for exposing framework::InitDevices() when
// Temporarily add this function for exposing framework::InitDevices() when
// linking the inference shared library.
void
Init
(
bool
init_p2p
)
{
framework
::
InitDevices
(
init_p2p
);
}
...
...
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
浏览文件 @
ed2d7d7d
...
...
@@ -46,8 +46,8 @@ TEST(inference, image_classification) {
// Run inference on CPU
LOG
(
INFO
)
<<
"--- CPU Runs: ---"
;
TestInference
<
paddle
::
platform
::
CPUPlace
,
false
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
);
TestInference
<
paddle
::
platform
::
CPUPlace
,
false
,
true
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
);
LOG
(
INFO
)
<<
output1
.
dims
();
#ifdef PADDLE_WITH_CUDA
...
...
@@ -57,8 +57,8 @@ TEST(inference, image_classification) {
// Run inference on CUDA GPU
LOG
(
INFO
)
<<
"--- GPU Runs: ---"
;
TestInference
<
paddle
::
platform
::
CUDAPlace
,
false
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
);
TestInference
<
paddle
::
platform
::
CUDAPlace
,
false
,
true
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
);
LOG
(
INFO
)
<<
output2
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
ed2d7d7d
...
...
@@ -89,7 +89,7 @@ void CheckError(const paddle::framework::LoDTensor& output1,
EXPECT_EQ
(
count
,
0U
)
<<
"There are "
<<
count
<<
" different elements."
;
}
template
<
typename
Place
,
bool
CreateVars
=
true
>
template
<
typename
Place
,
bool
CreateVars
=
true
,
bool
PrepareContext
=
false
>
void
TestInference
(
const
std
::
string
&
dirname
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_feeds
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_fetchs
,
...
...
@@ -175,8 +175,15 @@ void TestInference(const std::string& dirname,
}
// Ignore the profiling results of the first run
std
::
unique_ptr
<
paddle
::
framework
::
ExecutorPrepareContext
>
ctx
;
if
(
PrepareContext
)
{
ctx
=
executor
.
Prepare
(
*
inference_program
,
0
);
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
feed_targets
,
fetch_targets
,
CreateVars
);
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
,
CreateVars
);
}
// Enable the profiler
paddle
::
platform
::
EnableProfiler
(
state
);
...
...
@@ -187,9 +194,16 @@ void TestInference(const std::string& dirname,
"run_inference"
,
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
if
(
PrepareContext
)
{
// Note: if you change the inference_program, you need to call
// executor.Prepare() again to get a new ExecutorPrepareContext.
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
feed_targets
,
fetch_targets
,
CreateVars
);
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
,
CreateVars
);
}
}
// Disable the profiler and print the timing information
paddle
::
platform
::
DisableProfiler
(
...
...
paddle/fluid/operators/detail/grpc_client.cc
浏览文件 @
ed2d7d7d
...
...
@@ -35,7 +35,8 @@ bool RPCClient::AsyncSendVariable(const std::string& ep,
const
framework
::
Scope
*
p_scope
=
&
scope
;
const
auto
ch
=
GetChannel
(
ep_val
);
framework
::
Async
([
var_name_val
,
p_ctx
,
ep_val
,
p_scope
,
time_out
,
ch
,
this
]
{
framework
::
AsyncIO
([
var_name_val
,
p_ctx
,
ep_val
,
p_scope
,
time_out
,
ch
,
this
]
{
auto
*
var
=
p_scope
->
FindVar
(
var_name_val
);
::
grpc
::
ByteBuffer
req
;
...
...
@@ -89,7 +90,8 @@ bool RPCClient::AsyncGetVariable(const std::string& ep,
const
framework
::
Scope
*
p_scope
=
&
scope
;
const
auto
ch
=
GetChannel
(
ep_val
);
framework
::
Async
([
var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
ch
,
this
]
{
framework
::
AsyncIO
([
var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
ch
,
this
]
{
// prepare input
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
var_name_val
);
...
...
@@ -132,7 +134,7 @@ bool RPCClient::AsyncPrefetchVariable(const std::string& ep,
const
framework
::
Scope
*
p_scope
=
&
scope
;
const
auto
ch
=
GetChannel
(
ep_val
);
framework
::
Async
([
in_var_name_val
,
out_var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
framework
::
Async
IO
([
in_var_name_val
,
out_var_name_val
,
ep_val
,
p_scope
,
p_ctx
,
time_out
,
ch
,
this
]
{
auto
*
var
=
p_scope
->
FindVar
(
in_var_name_val
);
...
...
@@ -196,7 +198,7 @@ bool RPCClient::Wait() {
std
::
vector
<
std
::
future
<
void
>>
waits
(
req_count_
);
for
(
int
i
=
0
;
i
<
req_count_
;
i
++
)
{
waits
[
i
]
=
framework
::
Async
([
i
,
&
a
,
this
]
{
a
[
i
]
=
Proceed
();
});
waits
[
i
]
=
framework
::
Async
IO
([
i
,
&
a
,
this
]
{
a
[
i
]
=
Proceed
();
});
}
for
(
int
i
=
0
;
i
<
req_count_
;
i
++
)
{
...
...
paddle/fluid/operators/detail/grpc_server.cc
浏览文件 @
ed2d7d7d
...
...
@@ -217,10 +217,10 @@ void AsyncGRPCServer::RunSyncUpdate() {
std
::
function
<
void
()
>
prefetch_register
=
std
::
bind
(
&
AsyncGRPCServer
::
TryToRegisterNewPrefetchOne
,
this
);
// TODO(wuyi): Run these "HandleRequest" in thread pool
t_send_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
AsyncGRPCServer
::
HandleRequest
,
this
,
cq_send_
.
get
(),
"cq_send"
,
send_register
)));
t_get_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
AsyncGRPCServer
::
HandleRequest
,
this
,
cq_get_
.
get
(),
"cq_get"
,
get_register
)));
...
...
paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
浏览文件 @
ed2d7d7d
...
...
@@ -33,28 +33,14 @@ static constexpr size_t kChannelSize = 0; // kCacheSize - 2
class
DoubleBufferReader
:
public
framework
::
DecoratedReader
{
public:
struct
Item
{
Item
()
:
ctx_
(
nullptr
)
{}
Item
(
Item
&&
b
)
{
payloads_
=
std
::
move
(
b
.
payloads_
);
ctx_
=
std
::
move
(
b
.
ctx_
);
}
Item
&
operator
=
(
Item
&&
b
)
{
payloads_
=
std
::
move
(
b
.
payloads_
);
ctx_
=
std
::
move
(
b
.
ctx_
);
return
*
this
;
}
std
::
vector
<
framework
::
LoDTensor
>
payloads_
;
platform
::
DeviceContext
*
ctx_
;
};
explicit
DoubleBufferReader
(
ReaderBase
*
reader
,
platform
::
Place
target_place
=
platform
::
CPUPlace
())
:
DecoratedReader
(
reader
),
place_
(
target_place
)
{
cpu_tensor_cache_
.
resize
(
kCacheSize
);
gpu_tensor_cache_
.
resize
(
kCacheSize
);
#ifdef PADDLE_WITH_CUDA
for
(
size_t
i
=
0
;
i
<
kCacheSize
;
++
i
)
{
if
(
platform
::
is_gpu_place
(
place_
))
{
for
(
size_t
i
=
0
;
i
<
kCacheSize
;
++
i
)
{
ctxs_
.
emplace_back
(
new
platform
::
CUDADeviceContext
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
)));
}
...
...
@@ -72,7 +58,7 @@ class DoubleBufferReader : public framework::DecoratedReader {
bool
HasNext
()
const
;
void
StartPrefetcher
()
{
channel_
=
framework
::
MakeChannel
<
Item
>
(
kChannelSize
);
channel_
=
framework
::
MakeChannel
<
size_t
>
(
kChannelSize
);
prefetcher_
=
std
::
thread
([
this
]
{
PrefetchThreadFunc
();
});
}
...
...
@@ -88,8 +74,10 @@ class DoubleBufferReader : public framework::DecoratedReader {
void
PrefetchThreadFunc
();
std
::
thread
prefetcher_
;
framework
::
Channel
<
Item
>*
channel_
;
framework
::
Channel
<
size_t
>*
channel_
;
platform
::
Place
place_
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
cpu_tensor_cache_
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
gpu_tensor_cache_
;
std
::
vector
<
std
::
unique_ptr
<
platform
::
DeviceContext
>>
ctxs_
;
};
...
...
@@ -153,11 +141,14 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase {
void
DoubleBufferReader
::
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
{
out
->
clear
();
if
(
HasNext
())
{
Item
batch
;
channel_
->
Receive
(
&
batch
);
*
out
=
batch
.
payloads_
;
if
(
batch
.
ctx_
)
{
batch
.
ctx_
->
Wait
();
size_t
cached_tensor_id
;
channel_
->
Receive
(
&
cached_tensor_id
);
if
(
platform
::
is_gpu_place
(
place_
))
{
*
out
=
gpu_tensor_cache_
[
cached_tensor_id
];
ctxs_
[
cached_tensor_id
]
->
Wait
();
}
else
{
// CPU place
*
out
=
cpu_tensor_cache_
[
cached_tensor_id
];
}
}
}
...
...
@@ -176,42 +167,33 @@ bool DoubleBufferReader::HasNext() const {
void
DoubleBufferReader
::
PrefetchThreadFunc
()
{
VLOG
(
5
)
<<
"A new prefetch thread starts."
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
cpu_tensor_cache
(
kCacheSize
);
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
gpu_tensor_cache
(
kCacheSize
);
size_t
cached_tensor_id
=
0
;
while
(
true
)
{
Item
batch
;
auto
&
cpu_batch
=
cpu_tensor_cache
[
cached_tensor_id
];
auto
&
cpu_batch
=
cpu_tensor_cache_
[
cached_tensor_id
];
reader_
->
ReadNext
(
&
cpu_batch
);
if
(
cpu_batch
.
empty
())
{
// The underlying reader have no next data.
break
;
}
if
(
platform
::
is_gpu_place
(
place_
))
{
auto
&
gpu_batch
=
gpu_tensor_cache
[
cached_tensor_id
];
auto
&
gpu_batch
=
gpu_tensor_cache
_
[
cached_tensor_id
];
auto
*
gpu_ctx
=
ctxs_
[
cached_tensor_id
].
get
();
gpu_batch
.
resize
(
cpu_batch
.
size
());
for
(
size_t
i
=
0
;
i
<
cpu_batch
.
size
();
++
i
)
{
framework
::
TensorCopy
(
cpu_batch
[
i
],
place_
,
*
gpu_ctx
,
&
gpu_batch
[
i
]);
gpu_batch
[
i
].
set_lod
(
cpu_batch
[
i
].
lod
());
}
batch
.
payloads_
=
gpu_batch
;
batch
.
ctx_
=
gpu_ctx
;
}
else
{
// CPUPlace
batch
.
payloads_
=
cpu_batch
;
}
++
cached_tensor_id
;
cached_tensor_id
%=
kCacheSize
;
try
{
channel_
->
Send
(
&
batch
);
size_t
tmp
=
cached_tensor_id
;
channel_
->
Send
(
&
tmp
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
VLOG
(
5
)
<<
"WARNING: The double buffer channel has been closed. The "
"prefetch thread will terminate."
;
break
;
}
++
cached_tensor_id
;
cached_tensor_id
%=
kCacheSize
;
}
channel_
->
Close
();
VLOG
(
5
)
<<
"Prefetch thread terminates."
;
...
...
paddle/fluid/operators/reshape_op.h
浏览文件 @
ed2d7d7d
...
...
@@ -147,6 +147,7 @@ class ReshapeKernel : public framework::OpKernel<T> {
if
(
!
inplace
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
TensorCopy
(
*
in
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
out
);
ctx
.
device_context
().
Wait
();
// TensorCopy will resize to in_dims.
out
->
Resize
(
out_dims
);
}
else
{
...
...
@@ -169,6 +170,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> {
auto
in_dims
=
d_x
->
dims
();
if
(
!
inplace
)
{
framework
::
TensorCopy
(
*
d_out
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
d_x
);
ctx
.
device_context
().
Wait
();
d_x
->
Resize
(
in_dims
);
}
else
{
d_x
->
ShareDataWith
(
*
d_out
);
...
...
paddle/fluid/operators/uniform_random_op.cc
浏览文件 @
ed2d7d7d
...
...
@@ -24,7 +24,19 @@ template <typename T>
class
CPUUniformRandomKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
framework
::
Tensor
*
tensor
=
nullptr
;
auto
out_var
=
ctx
.
OutputVar
(
"Out"
);
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
shape
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"shape"
);
tensor
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_value
();
tensor
->
Resize
(
framework
::
make_ddim
(
shape
));
}
else
{
PADDLE_THROW
(
"uniform_random_op's output only"
"supports SelectedRows and Tensor"
);
}
T
*
data
=
tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
ctx
.
Attr
<
int
>
(
"seed"
));
std
::
minstd_rand
engine
;
...
...
paddle/fluid/operators/uniform_random_op.cu
浏览文件 @
ed2d7d7d
...
...
@@ -43,7 +43,19 @@ template <typename T>
class
GPUUniformRandomKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
framework
::
Tensor
*
tensor
=
nullptr
;
auto
out_var
=
context
.
OutputVar
(
"Out"
);
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
shape
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"shape"
);
tensor
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_value
();
tensor
->
Resize
(
framework
::
make_ddim
(
shape
));
}
else
{
PADDLE_THROW
(
"uniform_random_op's output only"
"supports SelectedRows and Tensor"
);
}
T
*
data
=
tensor
->
mutable_data
<
T
>
(
context
.
GetPlace
());
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
context
.
Attr
<
int
>
(
"seed"
));
if
(
seed
==
0
)
{
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
ed2d7d7d
...
...
@@ -157,7 +157,6 @@ def train(nn_type,
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
os
.
getenv
(
"PSERVERS"
)
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_INIT_TRAINER_ID"
))
...
...
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
浏览文件 @
ed2d7d7d
...
...
@@ -15,6 +15,16 @@
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
def
output_hist
(
out
):
hist
,
_
=
np
.
histogram
(
out
,
range
=
(
-
5
,
10
))
hist
=
hist
.
astype
(
"float32"
)
hist
/=
float
(
out
.
size
)
prob
=
0.1
*
np
.
ones
((
10
))
return
hist
,
prob
class
TestUniformRandomOp
(
OpTest
):
...
...
@@ -33,11 +43,37 @@ class TestUniformRandomOp(OpTest):
self
.
check_output_customized
(
self
.
verify_output
)
def
verify_output
(
self
,
outs
):
tensor
=
outs
[
0
]
hist
,
_
=
np
.
histogram
(
outs
[
0
],
range
=
(
-
5
,
10
))
hist
=
hist
.
astype
(
"float32"
)
hist
/=
float
(
outs
[
0
].
size
)
prob
=
0.1
*
np
.
ones
((
10
))
hist
,
prob
=
output_hist
(
np
.
array
(
outs
[
0
]))
self
.
assertTrue
(
np
.
allclose
(
hist
,
prob
,
rtol
=
0
,
atol
=
0.01
),
"hist: "
+
str
(
hist
))
class
TestUniformRandomOpSelectedRows
(
unittest
.
TestCase
):
def
get_places
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
test_check_output
(
self
):
for
place
in
self
.
get_places
():
self
.
check_with_place
(
place
)
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
out
=
scope
.
var
(
"X"
).
get_selected_rows
()
op
=
Operator
(
"uniform_random"
,
Out
=
"X"
,
shape
=
[
4
,
784
],
min
=-
5.0
,
max
=
10.0
,
seed
=
10
)
op
.
run
(
scope
,
place
)
self
.
assertEqual
(
out
.
get_tensor
().
shape
(),
[
4
,
784
])
hist
,
prob
=
output_hist
(
np
.
array
(
out
.
get_tensor
()))
self
.
assertTrue
(
np
.
allclose
(
hist
,
prob
,
rtol
=
0
,
atol
=
0.01
),
"hist: "
+
str
(
hist
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录