Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
29ad9794
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
29ad9794
编写于
8月 10, 2018
作者:
X
xzl
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_tensorrt_softmax
上级
641f32da
772ceee3
变更
20
显示空白变更内容
内联
并排
Showing
20 changed file
with
365 addition
and
64 deletion
+365
-64
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/external/cub.cmake
cmake/external/cub.cmake
+35
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+8
-6
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+3
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+0
-5
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+6
-4
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+22
-4
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+2
-1
paddle/fluid/operators/elementwise_op_function.h
paddle/fluid/operators/elementwise_op_function.h
+6
-6
paddle/fluid/operators/parallel_do_op.cc
paddle/fluid/operators/parallel_do_op.cc
+10
-12
python/paddle/dataset/conll05.py
python/paddle/dataset/conll05.py
+4
-4
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+1
-1
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-1
python/paddle/fluid/contrib/memory_usage_calc.py
python/paddle/fluid/contrib/memory_usage_calc.py
+102
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+78
-6
python/paddle/fluid/tests/unittests/test_memory_usage.py
python/paddle/fluid/tests/unittests/test_memory_usage.py
+69
-0
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+9
-8
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+4
-4
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+1
-1
未找到文件。
CMakeLists.txt
浏览文件 @
29ad9794
...
...
@@ -175,6 +175,7 @@ include(external/any) # download libn::any
include
(
external/eigen
)
# download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/cares
)
include
(
external/cub
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_GRPC
)
...
...
cmake/external/cub.cmake
0 → 100644
浏览文件 @
29ad9794
if
(
NOT WITH_GPU
)
return
()
endif
()
include
(
ExternalProject
)
set
(
CUB_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/cub
)
set
(
CUB_INCLUDE_DIR
${
CUB_SOURCE_DIR
}
/src/extern_cub
)
include_directories
(
${
CUB_INCLUDE_DIR
}
)
ExternalProject_Add
(
extern_cub
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/NVlabs/cub.git"
GIT_TAG
"v1.8.0"
PREFIX
${
CUB_SOURCE_DIR
}
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
if
(
${
CMAKE_VERSION
}
VERSION_LESS
"3.3.0"
)
set
(
dummyfile
${
CMAKE_CURRENT_BINARY_DIR
}
/cub_dummy.c
)
file
(
WRITE
${
dummyfile
}
"const char *dummy =
\"
${
dummyfile
}
\"
;"
)
add_library
(
cub STATIC
${
dummyfile
}
)
else
()
add_library
(
cub INTERFACE
)
endif
()
add_dependencies
(
cub extern_cub
)
LIST
(
APPEND externl_project_dependencies cub
)
paddle/fluid/API.spec
浏览文件 @
29ad9794
...
...
@@ -336,6 +336,7 @@ paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=Non
paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.create_splited_vars ArgSpec(args=['self', 'source_var', 'block', 'tag'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
29ad9794
...
...
@@ -275,7 +275,8 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
if
(
strategy_
.
gradient_scale_
!=
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
CreateScaleLossGradOp
(
&
result
);
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
);
}
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
...
...
@@ -535,7 +536,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph,
return
got
==
sharded_var_device
.
end
()
?
-
1
:
got
->
second
;
}
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
)
const
{
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
#ifdef PADDLE_WITH_CUDA
...
...
@@ -558,9 +560,9 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(ir::Graph *result) const {
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput
(
result
,
op_handle
,
result
->
CreateEmptyNode
(
GradVarName
(
loss_var_name_
)
,
ir
::
Node
::
Type
::
kVariable
),
CreateOpOutput
(
result
,
op_handle
,
result
->
CreateEmptyNode
(
loss_grad_name
,
ir
::
Node
::
Type
::
kVariable
),
places_
[
i
],
i
);
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
29ad9794
...
...
@@ -75,7 +75,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
)
const
;
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
void
CreateComputationalOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
29ad9794
...
...
@@ -330,12 +330,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
for
(
auto
&
op
:
ctx
->
ops_
)
{
VLOG
(
4
)
<<
place_
<<
" "
<<
op
->
DebugStringEx
(
local_scope
);
op
->
Run
(
*
local_scope
,
place_
);
// NOTE! Please do not delete this line, it's usefull because the debug
// string before and after op.run are different, after run the output
// will have right shape which is usefull for debug.
VLOG
(
3
)
<<
place_
<<
" "
<<
op
->
DebugStringEx
(
local_scope
);
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
29ad9794
...
...
@@ -127,7 +127,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
10
)
<<
"-
"
<<
DebugStringEx
(
&
scope
);
VLOG
(
4
)
<<
place
<<
"
"
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
...
...
@@ -139,7 +139,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
VLOG
(
10
)
<<
"+
"
<<
DebugStringEx
(
&
scope
);
VLOG
(
3
)
<<
place
<<
"
"
<<
DebugStringEx
(
&
scope
);
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
...
@@ -778,6 +778,7 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
const
ExecutionContext
&
ctx
)
const
{
auto
&
scope
=
ctx
.
scope
();
int
data_type
=
-
1
;
std
::
string
last_input_name
;
for
(
auto
&
input
:
this
->
inputs_
)
{
for
(
auto
&
ipt_name
:
input
.
second
)
{
auto
*
var
=
scope
.
FindVar
(
ipt_name
);
...
...
@@ -794,9 +795,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
int
tmp
=
static_cast
<
int
>
(
ToDataType
(
t
->
type
()));
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same. Get %
d != %d"
,
Type
()
,
data_typ
e
,
tmp
);
"DataType of Paddle Op %s must be the same. Get %
s(%d) != %s(%d)"
,
Type
(),
last_input_name
,
data_type
,
ipt_nam
e
,
tmp
);
data_type
=
tmp
;
last_input_name
=
ipt_name
;
}
}
}
...
...
paddle/fluid/inference/api/api.cc
浏览文件 @
29ad9794
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
...
...
@@ -40,19 +41,36 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
if
(
!
other
.
memory_owned_
)
{
data_
=
other
.
data_
;
length_
=
other
.
length_
;
memory_owned_
=
other
.
memory_owned_
;
}
else
{
Resize
(
other
.
length
());
memcpy
(
data_
,
other
.
data
(),
other
.
length
());
length_
=
other
.
length
();
memory_owned_
=
true
;
}
return
*
this
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
// only the buffer with external memory can be copied
assert
(
!
other
.
memory_owned_
);
data_
=
other
.
data_
;
length_
=
other
.
length_
;
memory_owned_
=
other
.
memory_owned_
;
other
.
data_
=
nullptr
;
other
.
length_
=
0
;
other
.
memory_owned_
=
false
;
return
*
this
;
}
void
PaddleBuf
::
Resize
(
size_t
length
)
{
// Only the owned memory can be reset, the external memory can't be changed.
if
(
length_
==
length
)
return
;
assert
(
memory_owned_
);
if
(
memory_owned_
)
{
Free
();
}
data_
=
new
char
[
length
];
length_
=
length
;
memory_owned_
=
true
;
...
...
@@ -68,7 +86,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
assert
(
length_
>
0
);
delete
static_cast
<
char
*>
(
data_
);
delete
[]
static_cast
<
char
*>
(
data_
);
data_
=
nullptr
;
length_
=
0
;
}
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
29ad9794
...
...
@@ -40,11 +40,12 @@ class PaddleBuf {
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
PaddleBuf
&&
);
// Do not own the memory.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Own memory.
explicit
PaddleBuf
(
size_t
length
)
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Resize to `length` bytes.
void
Resize
(
size_t
length
);
...
...
paddle/fluid/operators/elementwise_op_function.h
浏览文件 @
29ad9794
...
...
@@ -534,8 +534,8 @@ void ElemwiseGradCompute(const framework::ExecutionContext& ctx,
const
framework
::
Tensor
&
dout
,
int
axis
,
framework
::
Tensor
*
dx
,
framework
::
Tensor
*
dy
,
DX_OP
dx_op
,
DY_OP
dy_op
)
{
const
framework
::
DDim
x_dim
=
x
.
dims
();
const
framework
::
DDim
y_dim
=
y
.
dims
();
const
framework
::
DDim
&
x_dim
=
x
.
dims
();
const
framework
::
DDim
&
y_dim
=
y
.
dims
();
if
(
x
.
dims
()
==
y
.
dims
())
{
ElemwiseGradComputeNoBroadcast
<
DeviceContext
,
T
,
DX_OP
,
DY_OP
>
(
ctx
,
x_dim
,
y_dim
,
x
,
y
,
out
,
dout
,
axis
,
dx
,
dy
,
dx_op
,
dy_op
);
...
...
@@ -558,19 +558,19 @@ void ElemwiseExplicitGradCompute(const framework::ExecutionContext& ctx,
framework
::
Tensor
*
dx
,
framework
::
Tensor
*
dy
,
DX_OP
dx_op
,
DY_OP
dy_op
)
{
if
(
dy
==
nullptr
)
{
const
framework
::
DDim
dx_dims
=
dout
.
dims
();
const
framework
::
DDim
&
dx_dims
=
dout
.
dims
();
auto
dy_dims
=
dx_dims
;
ElemwiseGradComputeNoBroadcast
<
DeviceContext
,
T
,
DX_OP
,
DY_OP
>
(
ctx
,
dx_dims
,
dy_dims
,
x
,
y
,
out
,
dout
,
axis
,
dx
,
dy
,
dx_op
,
dy_op
);
}
else
{
if
(
dout
.
dims
()
==
dy
->
dims
())
{
const
framework
::
DDim
dx_dims
=
dout
.
dims
();
const
framework
::
DDim
dy_dims
=
dy
->
dims
();
const
framework
::
DDim
&
dx_dims
=
dout
.
dims
();
const
framework
::
DDim
&
dy_dims
=
dy
->
dims
();
ElemwiseGradComputeNoBroadcast
<
DeviceContext
,
T
,
DX_OP
,
DY_OP
>
(
ctx
,
dx_dims
,
dy_dims
,
x
,
y
,
out
,
dout
,
axis
,
dx
,
dy
,
dx_op
,
dy_op
);
}
else
{
// Y is a scalar
auto
dx_dims
=
dout
.
dims
();
const
framework
::
DDim
dy_dims
=
dy
->
dims
();
const
framework
::
DDim
&
dy_dims
=
dy
->
dims
();
ElemwiseGradComputeWithBroadcast
<
DeviceContext
,
T
,
DX_OP
,
DY_OP
>
(
ctx
,
dx_dims
,
dy_dims
,
x
,
y
,
out
,
dout
,
axis
,
dx
,
dy
,
dx_op
,
dy_op
);
}
...
...
paddle/fluid/operators/parallel_do_op.cc
浏览文件 @
29ad9794
...
...
@@ -163,8 +163,7 @@ class ParallelDoOp : public framework::OperatorBase {
auto
&
place
=
places
[
place_idx
];
auto
*
cur_scope
=
sub_scopes
[
place_idx
];
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
,
place_idx
]
{
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
executor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
...
...
@@ -239,8 +238,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto
*
cur_scope
=
sub_scopes
[
i
];
// execute
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
,
i
]
{
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
executor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
...
...
python/paddle/dataset/conll05.py
浏览文件 @
29ad9794
...
...
@@ -29,13 +29,13 @@ __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
WORDDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
wordDict.txt'
WORDDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
wordDict.txt'
WORDDICT_MD5
=
'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
verbDict.txt'
VERBDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
verbDict.txt'
VERBDICT_MD5
=
'0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
targetDict.txt'
TRGDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
targetDict.txt'
TRGDICT_MD5
=
'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
emb'
EMB_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
emb'
EMB_MD5
=
'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX
=
0
...
...
python/paddle/dataset/wmt14.py
浏览文件 @
29ad9794
...
...
@@ -40,7 +40,7 @@ URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz'
)
MD5_TRAIN
=
'0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92
URL_MODEL
=
'http://paddle
paddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.
gz'
URL_MODEL
=
'http://paddle
models.bj.bcebos.com/wmt%2Fwmt14.t
gz'
MD5_MODEL
=
'0cb4a5366189b6acba876491c8724fa3'
START
=
"<s>"
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
29ad9794
...
...
@@ -14,5 +14,7 @@
import
decoder
from
decoder
import
*
import
memory_usage_calc
from
memory_usage_calc
import
*
__all__
=
decoder
.
__all__
__all__
=
decoder
.
__all__
+
memory_usage_calc
.
__all__
python/paddle/fluid/contrib/memory_usage_calc.py
0 → 100644
浏览文件 @
29ad9794
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module privides a memory usage calculate function for user.
The purpose of this API is to allow users to estimate memory usage of
a program under a special batch size, then user can set appropriate
batch size to fully utilize a GPU.
This API is still under active development and may change drastically.
"""
from
..
import
core
from
..framework
import
Program
,
Variable
__all__
=
[
'memory_usage'
]
dtype_to_size
=
{
core
.
VarDesc
.
VarType
.
FP16
:
2
,
core
.
VarDesc
.
VarType
.
FP32
:
4
,
core
.
VarDesc
.
VarType
.
FP64
:
8
,
core
.
VarDesc
.
VarType
.
INT16
:
2
,
core
.
VarDesc
.
VarType
.
INT32
:
4
,
core
.
VarDesc
.
VarType
.
INT64
:
8
,
core
.
VarDesc
.
VarType
.
BOOL
:
1
,
core
.
VarDesc
.
VarType
.
UINT8
:
1
,
}
DEBUG
=
False
def
memory_usage
(
program
,
batch_size
):
"""
Get the estimate memory usage of program with input batch size.
Args:
program(Program): The current Program.
batch_size(int): The current input data batch_size.
Returns:
min_total_memory(float): the estimate memory usage lower bound.
max_total_memory(float): the estimate memory usage upper bound.
unit_str(string): the unit of estimate usage result.
Examples:
>>> import paddle.fluid as fluid
>>> lower_usage, upper_usage, unit = fluid.contrib.memory_usage(
fluid.default_main_program(), batch_size=10)
>>> print "memory usage is about %.3f - %.3f %s" %
\
(lower_usage, upper_usage, unit)
"""
# Parameters check
if
not
isinstance
(
program
,
Program
):
raise
TypeError
(
"Calculating Memory Usage requires Program as its Parameter."
"But you passed in %s"
%
(
type
(
prgram
)))
if
batch_size
<=
0
:
raise
ValueError
(
"The batch size need to be positive."
)
# Get the var_name list of first block and calculate
total_memory
=
0.0
for
var
in
program
.
global_block
().
vars
.
itervalues
():
data_count
=
1
for
x
in
var
.
shape
:
if
x
==
-
1
:
data_count
*=
batch_size
else
:
data_count
*=
x
var_memory
=
data_count
*
dtype_to_size
[
var
.
dtype
]
if
DEBUG
:
print
"%s memory usage: %d"
%
(
var
.
name
,
var_memory
)
total_memory
+=
var_memory
if
DEBUG
:
print
"total memory usage: %.2f"
%
(
total_memory
)
# Convert appropriate unit
unit_str
=
"B"
if
total_memory
>
1024
:
total_memory
/=
1024
unit_str
=
"KB"
if
total_memory
>
1024
:
total_memory
/=
1024
unit_str
=
"MB"
# Append extra memory consumption (5% - 10%)
min_total_memory
=
total_memory
*
1.05
max_total_memory
=
total_memory
*
1.1
return
min_total_memory
,
max_total_memory
,
unit_str
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
29ad9794
...
...
@@ -51,17 +51,17 @@ class TranspilerTest(unittest.TestCase):
self
.
origin_prog
=
main
.
clone
()
return
main
def
get_trainer
(
self
,
config
=
None
):
t
=
self
.
_transpiler_instance
(
config
)
def
get_trainer
(
self
,
config
=
None
,
sync_mode
=
True
):
t
=
self
.
_transpiler_instance
(
config
,
sync_mode
)
return
t
.
get_trainer_program
()
def
get_pserver
(
self
,
ep
,
config
=
None
):
t
=
self
.
_transpiler_instance
(
config
)
def
get_pserver
(
self
,
ep
,
config
=
None
,
sync_mode
=
True
):
t
=
self
.
_transpiler_instance
(
config
,
sync_mode
)
pserver
=
t
.
get_pserver_program
(
ep
)
startup
=
t
.
get_startup_program
(
ep
,
pserver
)
return
pserver
,
startup
def
_transpiler_instance
(
self
,
config
=
None
):
def
_transpiler_instance
(
self
,
config
=
None
,
sync_mode
=
True
):
if
not
self
.
transpiler
:
main
=
self
.
get_main_program
()
self
.
transpiler
=
fluid
.
DistributeTranspiler
(
config
=
config
)
...
...
@@ -69,7 +69,8 @@ class TranspilerTest(unittest.TestCase):
self
.
trainer_id
,
program
=
main
,
pservers
=
self
.
pserver_eps
,
trainers
=
self
.
trainers
)
trainers
=
self
.
trainers
,
sync_mode
=
sync_mode
)
return
self
.
transpiler
...
...
@@ -464,5 +465,76 @@ class TestDistLookupTable(TestDistLookupTableBase):
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
blocks
[
0
].
ops
],
ops
)
class
TestAsyncLocalLookupTable
(
TestDistLookupTableBase
):
def
net_conf
(
self
):
self
.
network_with_table
(
is_sparse
=
True
,
is_distributed
=
False
)
def
transpiler_test_impl
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
pserver1
,
startup1
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
,
False
)
self
.
assertEqual
(
len
(
pserver1
.
blocks
),
3
)
# 0 listen_and_serv
# 1 optimize for fc_w or fc_b adam
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
1
].
ops
],
[
"adam"
,
"scale"
,
"scale"
])
# 2 optimize for table adam
# NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
2
].
ops
],
[
"adam"
,
"scale"
,
"scale"
])
trainer
=
self
.
get_trainer
(
config
)
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'lookup_table'
,
'sequence_pool'
,
'lookup_table'
,
'sequence_pool'
,
'concat'
,
'mul'
,
'elementwise_add'
,
'cross_entropy'
,
'mean'
,
'fill_constant'
,
'mean_grad'
,
'cross_entropy_grad'
,
'elementwise_add_grad'
,
'send'
,
'mul_grad'
,
'send'
,
'concat_grad'
,
'sequence_pool_grad'
,
'lookup_table_grad'
,
'sequence_pool_grad'
,
'lookup_table_grad'
,
'sum'
,
'split_selected_rows'
,
'send'
,
'recv'
,
'recv'
,
'recv'
,
'concat'
]
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
blocks
[
0
].
ops
],
ops
)
class
TestAsyncDistLookupTable
(
TestDistLookupTableBase
):
def
net_conf
(
self
):
self
.
network_with_table
(
is_sparse
=
True
,
is_distributed
=
True
)
def
transpiler_test_impl
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
pserver1
,
startup1
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
,
False
)
self
.
assertEqual
(
len
(
pserver1
.
blocks
),
6
)
# 0 listen_and_serv
# 1 optimize for fc_w or fc_b adam
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
1
].
ops
],
[
"adam"
,
"scale"
,
"scale"
])
# 2 optimize for table sgd
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
2
].
ops
],
[
"sgd"
])
# 3 prefetch -> lookup_sparse_table for data0
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
3
].
ops
],
[
"lookup_sparse_table"
])
# 4 prefetch -> lookup_sparse_table for data1
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
4
].
ops
],
[
"lookup_sparse_table"
])
# 5 save table
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
5
].
ops
],
[
"save"
])
trainer
=
self
.
get_trainer
(
config
)
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'split_ids'
,
'prefetch'
,
'merge_ids'
,
'sequence_pool'
,
'split_ids'
,
'prefetch'
,
'merge_ids'
,
'sequence_pool'
,
'concat'
,
'mul'
,
'elementwise_add'
,
'cross_entropy'
,
'mean'
,
'fill_constant'
,
'mean_grad'
,
'cross_entropy_grad'
,
'elementwise_add_grad'
,
'send'
,
'mul_grad'
,
'send'
,
'concat_grad'
,
'sequence_pool_grad'
,
'lookup_table_grad'
,
'sequence_pool_grad'
,
'lookup_table_grad'
,
'sum'
,
'split_ids'
,
'send'
,
'recv'
,
'recv'
]
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
blocks
[
0
].
ops
],
ops
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_memory_usage.py
0 → 100644
浏览文件 @
29ad9794
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
import
contextlib
import
unittest
def
train_simulator
(
test_batch_size
=
10
):
if
test_batch_size
<=
0
:
raise
ValueError
(
"batch_size should be a positive integeral value, "
"but got batch_size={}"
.
format
(
test_batch_size
))
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
# Calculate memory usage in current network config
lower_usage
,
upper_usage
,
unit
=
fluid
.
contrib
.
memory_usage
(
fluid
.
default_main_program
(),
batch_size
=
test_batch_size
)
print
(
"memory usage is about %.3f - %.3f %s"
%
(
lower_usage
,
upper_usage
,
unit
))
class
TestMemoryUsage
(
unittest
.
TestCase
):
def
test_with_unit_B
(
self
):
with
self
.
program_scope_guard
():
train_simulator
()
def
test_with_unit_KB
(
self
):
with
self
.
program_scope_guard
():
train_simulator
(
test_batch_size
=
1000
)
def
test_with_unit_MB
(
self
):
with
self
.
program_scope_guard
():
train_simulator
(
test_batch_size
=
100000
)
@
contextlib
.
contextmanager
def
program_scope_guard
(
self
):
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
yield
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
29ad9794
...
...
@@ -293,6 +293,7 @@ class DistributeTranspiler(object):
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
})
if
self
.
sync_mode
:
program
.
global_block
().
append_op
(
type
=
"fetch_barrier"
,
inputs
=
{},
...
...
python/paddle/v2/dataset/conll05.py
浏览文件 @
29ad9794
...
...
@@ -29,13 +29,13 @@ __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
WORDDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
wordDict.txt'
WORDDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
wordDict.txt'
WORDDICT_MD5
=
'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
verbDict.txt'
VERBDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
verbDict.txt'
VERBDICT_MD5
=
'0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
targetDict.txt'
TRGDICT_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
targetDict.txt'
TRGDICT_MD5
=
'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL
=
'http://paddle
paddle.bj.bcebos.com/demo/srl_dict_and_embedding/
emb'
EMB_URL
=
'http://paddle
models.bj.bcebos.com/conll05st%2F
emb'
EMB_MD5
=
'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX
=
0
...
...
python/paddle/v2/dataset/wmt14.py
浏览文件 @
29ad9794
...
...
@@ -41,7 +41,7 @@ URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz'
)
MD5_TRAIN
=
'0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92
URL_MODEL
=
'http://paddle
paddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.
gz'
URL_MODEL
=
'http://paddle
models.bj.bcebos.com/wmt%2Fwmt14.t
gz'
MD5_MODEL
=
'0cb4a5366189b6acba876491c8724fa3'
START
=
"<s>"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录