Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6c4d1f55
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6c4d1f55
编写于
10月 03, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine codes
上级
e946fc15
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
188 addition
and
86 deletion
+188
-86
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+6
-2
paddle/framework/executor.cc
paddle/framework/executor.cc
+22
-22
paddle/framework/executor.h
paddle/framework/executor.h
+2
-2
paddle/framework/executor_test.cc
paddle/framework/executor_test.cc
+60
-43
paddle/platform/CMakeLists.txt
paddle/platform/CMakeLists.txt
+1
-1
paddle/platform/device_context_manager.cc
paddle/platform/device_context_manager.cc
+68
-0
paddle/platform/device_context_manager.h
paddle/platform/device_context_manager.h
+29
-16
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
6c4d1f55
...
...
@@ -44,8 +44,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device scope framework_proto
${
GLOB_OP_LIB
}
)
cc_test
(
executor_test SRCS executor_test.cc DEPS executor
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context_manager scope framework_proto
${
GLOB_OP_LIB
}
)
if
(
WITH_GPU
)
nv_test
(
executor_test SRCS executor_test.cc DEPS executor
)
else
()
cc_test
(
executor_test SRCS executor_test.cc DEPS executor
)
endif
()
cc_library
(
tensor_array SRCS tensor_array.cc DEPS lod_tensor
)
cc_test
(
tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place
)
paddle/framework/executor.cc
浏览文件 @
6c4d1f55
...
...
@@ -22,9 +22,21 @@ namespace paddle {
namespace
framework
{
Executor
::
Executor
(
const
std
::
vector
<
platform
::
Place
>&
places
)
{
devices_
.
resize
(
places
.
size
());
device
_context
s_
.
resize
(
places
.
size
());
for
(
size_t
i
=
0
;
i
<
places
.
size
();
i
++
)
{
devices_
[
i
]
=
platform
::
GetDevice
(
places
[
i
]);
if
(
platform
::
is_cpu_place
(
places
[
i
]))
{
device_contexts_
[
i
]
=
platform
::
DeviceContextManager
::
Get
()
->
GetDeviceContext
<
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
places
[
i
]));
}
else
{
#ifndef PADDLE_ONLY_CPU
device_contexts_
[
i
]
=
platform
::
DeviceContextManager
::
Get
()
->
GetDeviceContext
<
platform
::
GPUPlace
>
(
boost
::
get
<
platform
::
GPUPlace
>
(
places
[
i
]));
#else
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
#endif
}
}
}
...
...
@@ -34,37 +46,25 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope,
// TODO(tonyyang-svail):
// - only runs the first block
// - only runs on the first device
Scope
&
local_scope
=
scope
->
NewScope
();
auto
&
block
=
pdesc
.
blocks
(
0
);
auto
&
device
=
device
s_
[
0
];
auto
&
device
_context
=
device_context
s_
[
0
];
for
(
auto
&
var
:
block
.
vars
())
{
scope
->
NewVar
(
var
.
name
());
local_scope
.
NewVar
(
var
.
name
());
}
// std::vector<op_ptr> ops;
for
(
auto
&
op_desc
:
block
.
ops
())
{
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
op_desc
);
//
op->InferShape(*scope);
op
->
Run
(
*
scope
,
*
device
->
cpu_
device_context
);
//
InferShape is now doing inside Run method.
op
->
Run
(
local_scope
,
*
device_context
);
}
// TODO(tonyyang-svail): need to test gpu device
// device_->cpu_device_context->Wait();
// #ifndef PADDLE_ONLY_CPU
// if (device_->cuda_device_context) {
// device_->cuda_device_context->Wait();
// }
// #endif
Scope
&
local_scope
=
scope
->
NewScope
();
local_scope
.
NewVar
();
for
(
auto
device
:
devices_
)
{
device
->
cpu_device_context
->
Wait
();
#ifndef PADDLE_ONLY_CPU
if
(
device
->
cuda_device_context
)
{
device
->
cuda_device_context
->
Wait
();
}
#endif
for
(
auto
device_context
:
device_contexts_
)
{
device_context
->
Wait
();
}
}
...
...
paddle/framework/executor.h
浏览文件 @
6c4d1f55
...
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device.h"
#include "paddle/platform/device
_context_manager
.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -30,7 +30,7 @@ class Executor {
void
Run
(
const
ProgramDesc
&
,
Scope
*
,
std
::
vector
<
Tensor
>*
);
private:
std
::
vector
<
platform
::
Device
*>
device
s_
;
std
::
vector
<
platform
::
Device
Context
*>
device_context
s_
;
};
}
// namespace framework
...
...
paddle/framework/executor_test.cc
浏览文件 @
6c4d1f55
...
...
@@ -15,8 +15,6 @@ limitations under the License. */
#include "paddle/framework/executor.h"
#include "gtest/gtest.h"
#include "paddle/framework/attribute.h"
#include <gtest/gtest.h>
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
...
...
@@ -26,52 +24,71 @@ USE_OP(elementwise_add);
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
TEST
(
Executor
,
Init
)
{
ProgramDesc
pdesc
;
auto
root_block
=
pdesc
.
add_blocks
();
root_block
->
set_idx
(
0
);
root_block
->
set_parent_idx
(
-
1
);
auto
a
=
root_block
->
add_vars
();
a
->
set_name
(
"a"
);
auto
a_lt
=
a
->
mutable_lod_tensor
();
a_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
a_lt
->
add_dims
(
640
);
a_lt
->
add_dims
(
640
);
auto
b
=
root_block
->
add_vars
();
b
->
set_name
(
"b"
);
auto
b_lt
=
b
->
mutable_lod_tensor
();
b_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
b_lt
->
add_dims
(
640
);
b_lt
->
add_dims
(
640
);
auto
c
=
root_block
->
add_vars
();
c
->
set_name
(
"c"
);
auto
c_lt
=
c
->
mutable_lod_tensor
();
c_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
c_lt
->
add_dims
(
640
);
c_lt
->
add_dims
(
640
);
auto
op1
=
root_block
->
add_ops
();
op1
->
set_type
(
"elementwise_add"
);
auto
X
=
op1
->
add_inputs
();
X
->
set_parameter
(
"X"
);
X
->
add_arguments
(
"a"
);
auto
Y
=
op1
->
add_inputs
();
Y
->
set_parameter
(
"Y"
);
Y
->
add_arguments
(
"b"
);
CPUPlace
cpu_place1
,
cpu_place2
;
class
ExecutorTester
:
public
::
testing
::
Test
{
public:
virtual
void
SetUp
()
override
{
auto
root_block
=
pdesc_
.
add_blocks
();
root_block
->
set_idx
(
0
);
root_block
->
set_parent_idx
(
-
1
);
auto
a
=
root_block
->
add_vars
();
a
->
set_name
(
"a"
);
auto
a_lt
=
a
->
mutable_lod_tensor
();
a_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
a_lt
->
add_dims
(
640
);
a_lt
->
add_dims
(
640
);
auto
b
=
root_block
->
add_vars
();
b
->
set_name
(
"b"
);
auto
b_lt
=
b
->
mutable_lod_tensor
();
b_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
b_lt
->
add_dims
(
640
);
b_lt
->
add_dims
(
640
);
auto
c
=
root_block
->
add_vars
();
c
->
set_name
(
"c"
);
auto
c_lt
=
c
->
mutable_lod_tensor
();
c_lt
->
set_data_type
(
paddle
::
framework
::
DataType
::
FP32
);
c_lt
->
add_dims
(
640
);
c_lt
->
add_dims
(
640
);
auto
op1
=
root_block
->
add_ops
();
op1
->
set_type
(
"elementwise_add"
);
auto
X
=
op1
->
add_inputs
();
X
->
set_parameter
(
"X"
);
X
->
add_arguments
(
"a"
);
auto
Y
=
op1
->
add_inputs
();
Y
->
set_parameter
(
"Y"
);
Y
->
add_arguments
(
"b"
);
}
protected:
std
::
vector
<
Tensor
>*
outputs_
{
nullptr
};
ProgramDesc
pdesc_
;
Scope
scope_
;
};
TEST_F
(
ExecutorTester
,
InitCPU
)
{
std
::
vector
<
Place
>
places
;
CPUPlace
cpu_place1
,
cpu_place2
;
places
.
push_back
(
cpu_place1
);
places
.
push_back
(
cpu_place2
);
Executor
*
executor
=
new
Executor
(
places
);
Scope
s
;
std
::
vector
<
Tensor
>*
outputs
{
nullptr
};
executor
->
Run
(
pdesc
,
&
s
,
outputs
);
executor
->
Run
(
pdesc_
,
&
scope_
,
outputs_
);
delete
executor
;
}
#ifndef PADDLE_ONLY_CPU
TEST_F
(
ExecutorTester
,
InitGPU
)
{
std
::
vector
<
Place
>
places
;
GPUPlace
gpu_place0
(
0
);
GPUPlace
gpu_place1
(
1
);
places
.
push_back
(
gpu_place0
);
places
.
push_back
(
gpu_place1
);
Executor
*
executor
=
new
Executor
(
places
);
executor
->
Run
(
pdesc_
,
&
scope_
,
outputs_
);
delete
executor
;
}
#endif
paddle/platform/CMakeLists.txt
浏览文件 @
6c4d1f55
...
...
@@ -23,7 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3
${
GPU_CTX_DEPS
}
)
nv_test
(
device_context_test SRCS device_context_test.cc DEPS device_context gpu_info
)
cc_library
(
device
SRCS device
.cc DEPS device_context
)
cc_library
(
device
_context_manager SRCS device_context_manager
.cc DEPS device_context
)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS paddle_memory place device_context
)
paddle/platform/device.cc
→
paddle/platform/device
_context_manager
.cc
浏览文件 @
6c4d1f55
...
...
@@ -12,48 +12,57 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/device.h"
#include "paddle/platform/device
_context_manager
.h"
namespace
paddle
{
namespace
platform
{
template
<
typename
T
,
typename
...
Args
>
std
::
unique_ptr
<
T
>
make_unique
(
Args
&&
...
args
)
{
return
std
::
unique_ptr
<
T
>
(
new
T
(
std
::
forward
<
Args
>
(
args
)...));
DeviceContextManager
::
DeviceContextManager
()
{
#ifndef PADDLE_ONLY_CPU
device_count_
=
GetDeviceCount
();
cuda_contexts_
.
reserve
(
device_count_
);
for
(
int
i
=
0
;
i
<
device_count_
;
i
++
)
{
cuda_contexts_
[
i
]
=
nullptr
;
}
#endif
}
CPUDeviceContext
*
GetCPUDeviceContext
(
const
CPUPlace
&
place
)
{
static
std
::
unique_ptr
<
CPUDeviceContext
>
g_cpu_device_context
=
make_unique
<
CPUDeviceContext
>
(
place
);
return
g_cpu_device_context
.
get
();
template
<
>
CPUDeviceContext
*
DeviceContextManager
::
GetDeviceContext
<
CPUPlace
,
CPUDeviceContext
>
(
const
CPUPlace
&
place
)
{
if
(
!
cpu_context_
)
{
cpu_context_
=
new
CPUDeviceContext
(
place
);
}
return
cpu_context_
;
}
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext
*
GetCUDADeviceContext
(
const
GPUPlace
&
place
)
{
static
std
::
unique_ptr
<
CUDADeviceContext
>
g_cuda_device_context
=
make_unique
<
CUDADeviceContext
>
(
place
);
return
g_cuda_device_context
.
get
();
template
<
>
CUDADeviceContext
*
DeviceContextManager
::
GetDeviceContext
<
GPUPlace
,
CUDADeviceContext
>
(
const
GPUPlace
&
place
)
{
int
gpu_id
=
place
.
device
;
PADDLE_ENFORCE
(
gpu_id
<
device_count_
,
"GPU device id must less than device count"
);
SetDeviceId
(
gpu_id
);
if
(
!
cuda_contexts_
[
gpu_id
])
{
cuda_contexts_
[
gpu_id
]
=
new
CUDADeviceContext
(
place
);
}
return
cuda_contexts_
[
gpu_id
];
}
#endif
Device
*
GetDevice
(
const
Place
&
place
)
{
CPUPlace
cpu_place
;
DeviceContextManager
::~
DeviceContextManager
()
{
if
(
cpu_context_
)
{
delete
cpu_context_
;
}
#ifndef PADDLE_ONLY_CPU
if
(
is_gpu_place
(
place
))
{
GPUPlace
gpu_place
=
boost
::
get
<
GPUPlace
>
(
place
);
static
std
::
unique_ptr
<
Device
>
g_device
=
make_unique
<
Device
>
(
GetCPUDeviceContext
(
cpu_place
),
GetCUDADeviceContext
(
gpu_place
));
return
g_device
.
get
();
}
else
{
static
std
::
unique_ptr
<
Device
>
g_device
=
make_unique
<
Device
>
(
GetCPUDeviceContext
(
cpu_place
),
nullptr
);
return
g_device
.
get
();
for
(
int
i
=
0
;
i
<
device_count_
;
i
++
)
{
if
(
cuda_contexts_
[
i
])
{
delete
cuda_contexts_
[
i
];
}
}
#else
static
std
::
unique_ptr
<
Device
>
g_device
=
make_unique
<
Device
>
(
GetCPUDeviceContext
(
cpu_place
));
return
g_device
.
get
();
#endif
}
}
// namespace platform
}
// namespace paddle
paddle/platform/device.h
→
paddle/platform/device
_context_manager
.h
浏览文件 @
6c4d1f55
...
...
@@ -13,33 +13,46 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
namespace
paddle
{
namespace
platform
{
struct
Device
{
CPUDeviceContext
*
cpu_device_context
;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext
*
cuda_device_context
;
#endif
template
<
typename
T
>
struct
Converter
;
template
<
>
struct
Converter
<
CPUPlace
>
{
using
DeviceContextType
=
CPUDeviceContext
;
};
#ifndef PADDLE_ONLY_CPU
Device
(
CPUDeviceContext
*
cpu
,
CUDADeviceContext
*
gpu
)
:
cpu_device_context
(
cpu
),
cuda_device_context
(
gpu
)
{}
#else
explicit
Device
(
CPUDeviceContext
*
cpu
)
:
cpu_device_context
(
cpu
)
{}
#endif
template
<
>
struct
Converter
<
GPUPlace
>
{
using
DeviceContextType
=
CUDADeviceContext
;
};
#endif
class
DeviceContextManager
{
public:
DeviceContextManager
();
~
DeviceContextManager
();
template
<
typename
PlaceType
,
typename
DeviceType
=
typename
Converter
<
PlaceType
>
::
DeviceContextType
>
DeviceType
*
GetDeviceContext
(
const
PlaceType
&
place
);
CPUDeviceContext
*
GetCPUDeviceContext
(
const
platform
::
CPUPlace
&
place
);
static
DeviceContextManager
*
Get
()
{
static
DeviceContextManager
inst
;
return
&
inst
;
}
private:
CPUDeviceContext
*
cpu_context_
;
#ifndef PADDLE_ONLY_CPU
CUDADeviceContext
*
GetCUDADeviceContext
(
const
platform
::
GPUPlace
&
place
);
int
device_count_
;
std
::
vector
<
CUDADeviceContext
*>
cuda_contexts_
;
#endif
Device
*
GetDevice
(
const
platform
::
Place
&
place
);
};
}
// namespace platform
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录