Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e79ad2ea
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e79ad2ea
编写于
9月 28, 2018
作者:
D
Dang Qingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into quantize_transpiler_update
上级
209f799f
643b6faa
变更
38
显示空白变更内容
内联
并排
Showing
38 changed file
with
1422 addition
and
598 deletion
+1422
-598
paddle/fluid/API.spec
paddle/fluid/API.spec
+7
-7
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+4
-5
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+5
-0
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+126
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+35
-0
paddle/fluid/framework/details/cow_ptr.h
paddle/fluid/framework/details/cow_ptr.h
+19
-61
paddle/fluid/framework/details/cow_ptr_test.cc
paddle/fluid/framework/details/cow_ptr_test.cc
+8
-0
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+2
-0
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+70
-2
paddle/fluid/framework/ir/graph_helper.h
paddle/fluid/framework/ir/graph_helper.h
+2
-0
paddle/fluid/framework/ir/graph_helper_test.cc
paddle/fluid/framework/ir/graph_helper_test.cc
+91
-0
paddle/fluid/framework/ir/pass.cc
paddle/fluid/framework/ir/pass.cc
+0
-1
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+27
-4
paddle/fluid/framework/ir/pass_builder.cc
paddle/fluid/framework/ir/pass_builder.cc
+43
-0
paddle/fluid/framework/ir/pass_builder.h
paddle/fluid/framework/ir/pass_builder.h
+49
-0
paddle/fluid/framework/ir/pass_test.cc
paddle/fluid/framework/ir/pass_test.cc
+4
-6
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+362
-241
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+13
-90
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+2
-2
paddle/fluid/operators/detection_map_op.h
paddle/fluid/operators/detection_map_op.h
+15
-13
paddle/fluid/operators/extract_rows_op.cc
paddle/fluid/operators/extract_rows_op.cc
+1
-1
paddle/fluid/operators/lookup_table_op.cu
paddle/fluid/operators/lookup_table_op.cu
+2
-4
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+4
-6
paddle/fluid/operators/sampling_id_op.cc
paddle/fluid/operators/sampling_id_op.cc
+8
-7
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+22
-21
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+0
-1
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+29
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+17
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+3
-3
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+2
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+263
-104
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+0
-7
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+1
-1
python/paddle/fluid/tests/unittests/test_detection_map_op.py
python/paddle/fluid/tests/unittests/test_detection_map_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+60
-1
python/paddle/fluid/tests/unittests/test_pass_builder.py
python/paddle/fluid/tests/unittests/test_pass_builder.py
+121
-0
python/paddle/fluid/tests/unittests/transformer_model.py
python/paddle/fluid/tests/unittests/transformer_model.py
+1
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
e79ad2ea
...
@@ -153,6 +153,13 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_
...
@@ -153,6 +153,13 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
@@ -224,13 +231,6 @@ paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwarg
...
@@ -224,13 +231,6 @@ paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwarg
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.gaussian_random ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sampling_id ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
e79ad2ea
...
@@ -150,11 +150,10 @@ else()
...
@@ -150,11 +150,10 @@ else()
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph graph_viz_pass multi_devices_graph_pass
graph build_strategy
multi_devices_graph_print_pass multi_devices_graph_check_pass
fast_threaded_ssa_graph_executor
)
fast_threaded_ssa_graph_executor fuse_elewise_add_act_pass
)
endif
()
# NOT WIN32
endif
()
# NOT WIN32
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
e79ad2ea
...
@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
...
@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
# device_context reduce_op_handle )
# device_context reduce_op_handle )
cc_library
(
fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
cc_library
(
fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context
)
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context
)
cc_library
(
build_strategy SRCS build_strategy.cc DEPS
graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass
)
paddle/fluid/framework/details/build_strategy.cc
0 → 100644
浏览文件 @
e79ad2ea
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
ParallelExecutorPassBuilder
:
public
ir
::
PassBuilder
{
public:
explicit
ParallelExecutorPassBuilder
(
const
BuildStrategy
&
strategy
)
:
ir
::
PassBuilder
(),
strategy_
(
strategy
)
{
// Add a graph viz pass to record a graph.
if
(
!
strategy_
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
AppendPass
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy_
.
debug_graphviz_path_
.
c_str
(),
"_original_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
}
// Add op fusion.
if
(
strategy
.
fuse_elewise_add_act_ops_
)
{
auto
fuse_elewise_add_act_pass
=
AppendPass
(
"fuse_elewise_add_act_pass"
);
// Add a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
AppendPass
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
}
}
// Convert graph to run on multi-devices.
auto
multi_devices_pass
=
AppendPass
(
"multi_devices_pass"
);
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
&
strategy_
);
// Add a graph print pass to record a graph with device info.
if
(
!
strategy_
.
debug_graphviz_path_
.
empty
())
{
auto
multi_devices_print_pass
=
AppendPass
(
"multi_devices_print_pass"
);
multi_devices_print_pass
->
SetNotOwned
<
const
std
::
string
>
(
"debug_graphviz_path"
,
&
strategy_
.
debug_graphviz_path_
);
multi_devices_print_pass
->
Set
<
details
::
GraphvizSSAGraphPrinter
>
(
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
}
private:
BuildStrategy
strategy_
;
};
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
()
const
{
pass_builder_
.
reset
(
new
ParallelExecutorPassBuilder
(
*
this
));
return
pass_builder_
;
}
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#ifdef PADDLE_WITH_CUDA
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
const
bool
use_cuda
)
const
{
#endif
// Create a default one if not initialized by user.
if
(
!
pass_builder_
)
{
CreatePassesFromStrategy
();
}
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
for
(
std
::
shared_ptr
<
ir
::
Pass
>
&
pass
:
pass_builder_
->
AllPasses
())
{
if
(
pass
->
Type
()
==
"multi_devices_pass"
)
{
pass
->
Erase
(
"places"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
Erase
(
"params"
);
pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
#endif
}
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
}
return
graph
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
e79ad2ea
...
@@ -15,6 +15,17 @@
...
@@ -15,6 +15,17 @@
#pragma once
#pragma once
#include <string>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -57,6 +68,30 @@ struct BuildStrategy {
...
@@ -57,6 +68,30 @@ struct BuildStrategy {
bool
fuse_elewise_add_act_ops_
{
false
};
bool
fuse_elewise_add_act_ops_
{
false
};
bool
enable_data_balance_
{
false
};
bool
enable_data_balance_
{
false
};
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// A new PassBuilder is created based on configs defined above and
// passes are owned by the PassBuilder.
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
()
const
;
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#ifdef PADDLE_WITH_CUDA
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
const
bool
use_cuda
)
const
;
#endif
private:
mutable
std
::
shared_ptr
<
ir
::
PassBuilder
>
pass_builder_
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/details/cow_ptr.h
浏览文件 @
e79ad2ea
...
@@ -20,79 +20,37 @@ namespace paddle {
...
@@ -20,79 +20,37 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
// Change it to thread safe flags if needed.
template
<
class
T
>
class
ThreadUnsafeOwnershipFlags
{
class
COWPtr
{
public:
public:
explicit
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
typedef
std
::
shared_ptr
<
T
>
RefPtr
;
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
&
operator
=
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
(
ThreadUnsafeOwnershipFlags
&&
other
)
=
default
;
void
SetOwnership
(
bool
flag
)
{
flag_
=
flag
;
}
// Invoke the callback if it is not owned.
template
<
typename
Callback
>
void
AcquireOwnershipOnce
(
Callback
acquire
)
{
if
(
!
flag_
)
{
acquire
();
flag_
=
true
;
}
}
private:
private:
bool
flag_
;
RefPtr
m_sp
;
};
// Copy-On-Write pointer.
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
//
// The template parameter OwnershipFlags should have:
// * a constructor takes a bool. True if own.
// * SetOwnership(bool flag).
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
// owned.
//
// https://en.wikipedia.org/wiki/Copy-on-write
template
<
typename
T
,
typename
OwnershipFlags
=
ThreadUnsafeOwnershipFlags
>
class
COWPtr
{
public:
public:
// Ctor from raw pointer.
COWPtr
()
:
m_sp
(
nullptr
)
{}
explicit
COWPtr
(
T
*
ptr
)
:
payload_
(
ptr
),
ownership_
{
true
}
{}
explicit
COWPtr
(
T
*
t
)
:
m_sp
(
t
)
{}
// Move methods. Steal ownership from origin
const
T
&
Data
()
const
{
return
*
m_sp
;
}
COWPtr
(
COWPtr
&&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
std
::
move
(
other
.
ownership_
)}
{}
COWPtr
&
operator
=
(
COWPtr
&&
origin
)
=
default
;
// Copy methods. Not own payload
COWPtr
(
const
COWPtr
&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
false
}
{}
COWPtr
&
operator
=
(
const
COWPtr
&
other
)
{
payload_
=
other
.
payload_
;
ownership_
.
SetOwnership
(
false
);
return
*
this
;
}
// Access read only data.
const
T
&
Data
()
const
{
return
*
payload_
;
}
// Access mutable data. If the data is not owned, the data will be copied
// before.
T
*
MutableData
()
{
T
*
MutableData
()
{
ownership_
.
AcquireOwnershipOnce
(
DetachIfNotUnique
();
[
this
]
{
payload_
.
reset
(
new
T
(
*
payload_
));
});
return
m_sp
.
get
();
return
payload_
.
get
();
}
}
private:
void
DetachIfNotUnique
()
{
// Actual data pointer.
T
*
tmp
=
m_sp
.
get
();
std
::
shared_ptr
<
T
>
payload_
;
if
(
!
(
tmp
==
nullptr
||
m_sp
.
unique
()))
{
Detach
();
}
}
// Ownership flag.
void
Detach
()
{
OwnershipFlags
ownership_
;
T
*
tmp
=
m_sp
.
get
();
m_sp
=
RefPtr
(
new
T
(
*
tmp
));
}
};
};
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/details/cow_ptr_test.cc
浏览文件 @
e79ad2ea
...
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
...
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
}
}
TEST
(
COWPtr
,
change_old
)
{
COWPtr
<
int
>
ptr
(
new
int
{
0
});
COWPtr
<
int
>
ptr2
=
ptr
;
*
ptr
.
MutableData
()
=
10
;
ASSERT_EQ
(
ptr2
.
Data
(),
0
);
ASSERT_EQ
(
ptr
.
Data
(),
10
);
}
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
e79ad2ea
...
@@ -41,6 +41,8 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
...
@@ -41,6 +41,8 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_library
(
pass_builder SRCS pass_builder.cc DEPS pass
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
e79ad2ea
...
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/ir/graph_helper.h"
#include <algorithm>
#include <algorithm>
#include <deque>
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
...
@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
...
@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
return
adj_list
;
return
adj_list
;
}
}
size_t
GraphNum
(
const
Graph
&
graph
)
{
std
::
unordered_set
<
ir
::
Node
*>
nodes
=
graph
.
Nodes
();
std
::
unordered_set
<
ir
::
Node
*>
visited_nodes
;
visited_nodes
.
reserve
(
nodes
.
size
());
std
::
deque
<
ir
::
Node
*>
q_nodes
;
std
::
vector
<
std
::
unordered_set
<
ir
::
Node
*>>
graph_nodes
;
std
::
unordered_set
<
ir
::
Node
*>
g_nodes
;
size_t
graph_count
=
0
;
auto
traverse_nodes
=
[
&
visited_nodes
,
&
q_nodes
](
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
std
::
copy_if
(
nodes
.
begin
(),
nodes
.
end
(),
std
::
back_inserter
(
q_nodes
),
[
&
visited_nodes
](
Node
*
node
)
{
return
!
visited_nodes
.
count
(
node
);
});
};
while
(
visited_nodes
.
size
()
!=
nodes
.
size
())
{
if
(
!
q_nodes
.
empty
())
{
auto
cur_node
=
q_nodes
.
front
();
q_nodes
.
pop_front
();
visited_nodes
.
insert
(
cur_node
);
g_nodes
.
insert
(
cur_node
);
traverse_nodes
(
cur_node
->
inputs
);
traverse_nodes
(
cur_node
->
outputs
);
}
else
{
++
graph_count
;
if
(
g_nodes
.
size
())
{
graph_nodes
.
emplace_back
(
g_nodes
);
}
g_nodes
.
clear
();
for
(
auto
&
n
:
nodes
)
{
if
(
visited_nodes
.
count
(
n
)
==
0
)
{
q_nodes
.
push_back
(
n
);
break
;
}
}
}
}
if
(
g_nodes
.
size
())
{
graph_nodes
.
emplace_back
(
g_nodes
);
}
if
(
VLOG_IS_ON
(
10
))
{
VLOG
(
10
)
<<
"graph_num: "
<<
graph_nodes
.
size
();
for
(
auto
&
g_n
:
graph_nodes
)
{
VLOG
(
10
)
<<
"graph_nodes: "
<<
g_n
.
size
();
if
(
g_n
.
size
()
<
10
)
{
std
::
stringstream
out
;
for
(
auto
&
node
:
g_n
)
{
out
<<
"
\n
Node: "
<<
node
->
Name
()
<<
" in ["
;
for
(
auto
&
n
:
node
->
inputs
)
{
out
<<
n
->
Name
()
<<
", "
;
}
out
<<
"], out["
;
for
(
auto
&
n
:
node
->
outputs
)
{
out
<<
n
->
Name
()
<<
", "
;
}
out
<<
"]"
;
}
VLOG
(
10
)
<<
out
.
str
();
}
}
}
return
graph_count
;
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_helper.h
浏览文件 @
e79ad2ea
...
@@ -27,6 +27,8 @@ namespace ir {
...
@@ -27,6 +27,8 @@ namespace ir {
// Test if the graph contains circle.
// Test if the graph contains circle.
bool
HasCircle
(
const
Graph
&
graph
);
bool
HasCircle
(
const
Graph
&
graph
);
size_t
GraphNum
(
const
Graph
&
graph
);
// Topology Sort the operations in the graph from inputs to outputs.
// Topology Sort the operations in the graph from inputs to outputs.
// `graph` cannot contain circle.
// `graph` cannot contain circle.
std
::
vector
<
ir
::
Node
*>
TopologySortOperations
(
const
Graph
&
graph
);
std
::
vector
<
ir
::
Node
*>
TopologySortOperations
(
const
Graph
&
graph
);
...
...
paddle/fluid/framework/ir/graph_helper_test.cc
浏览文件 @
e79ad2ea
...
@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) {
...
@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) {
ASSERT_EQ
(
node_map
.
at
(
"op2"
),
1UL
);
ASSERT_EQ
(
node_map
.
at
(
"op2"
),
1UL
);
ASSERT_TRUE
(
node_map
.
at
(
"op3"
)
<
node_map
.
at
(
"op5"
));
ASSERT_TRUE
(
node_map
.
at
(
"op3"
)
<
node_map
.
at
(
"op5"
));
}
}
void
BuildZeroGraph
(
Graph
*
g
)
{}
void
BuildOneGraph
(
Graph
*
g
)
{
ir
::
Node
*
o1
=
g
->
CreateEmptyNode
(
"op1"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o2
=
g
->
CreateEmptyNode
(
"op2"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o3
=
g
->
CreateEmptyNode
(
"op3"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o4
=
g
->
CreateEmptyNode
(
"op4"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o5
=
g
->
CreateEmptyNode
(
"op5"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
v1
=
g
->
CreateEmptyNode
(
"var1"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v2
=
g
->
CreateEmptyNode
(
"var2"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v3
=
g
->
CreateEmptyNode
(
"var3"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v4
=
g
->
CreateEmptyNode
(
"var4"
,
Node
::
Type
::
kVariable
);
// o1->v1->o2
o1
->
outputs
.
push_back
(
v1
);
o2
->
inputs
.
push_back
(
v1
);
v1
->
inputs
.
push_back
(
o1
);
v1
->
outputs
.
push_back
(
o2
);
// o2->v2->o3
// o2->v2->o4
o2
->
outputs
.
push_back
(
v2
);
o3
->
inputs
.
push_back
(
v2
);
o4
->
inputs
.
push_back
(
v2
);
v2
->
inputs
.
push_back
(
o2
);
v2
->
outputs
.
push_back
(
o3
);
v2
->
outputs
.
push_back
(
o4
);
// o2->v3->o5
o2
->
outputs
.
push_back
(
v3
);
o5
->
inputs
.
push_back
(
v3
);
v3
->
inputs
.
push_back
(
o2
);
v3
->
outputs
.
push_back
(
o5
);
// o3-v4->o5
o3
->
outputs
.
push_back
(
v4
);
o5
->
inputs
.
push_back
(
v4
);
v4
->
inputs
.
push_back
(
o3
);
v4
->
outputs
.
push_back
(
o5
);
}
void
BuildTwoGraphs
(
Graph
*
g
)
{
ir
::
Node
*
o1
=
g
->
CreateEmptyNode
(
"op1"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o2
=
g
->
CreateEmptyNode
(
"op2"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o3
=
g
->
CreateEmptyNode
(
"op3"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o4
=
g
->
CreateEmptyNode
(
"op4"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o5
=
g
->
CreateEmptyNode
(
"op5"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
v1
=
g
->
CreateEmptyNode
(
"var1"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v2
=
g
->
CreateEmptyNode
(
"var2"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v3
=
g
->
CreateEmptyNode
(
"var3"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v4
=
g
->
CreateEmptyNode
(
"var4"
,
Node
::
Type
::
kVariable
);
// o1->v1->o2
o1
->
outputs
.
push_back
(
v1
);
o2
->
inputs
.
push_back
(
v1
);
v1
->
inputs
.
push_back
(
o1
);
v1
->
outputs
.
push_back
(
o2
);
// o2->v2->o3
// o2->v2->o4
o2
->
outputs
.
push_back
(
v2
);
o3
->
inputs
.
push_back
(
v2
);
o4
->
inputs
.
push_back
(
v2
);
v2
->
inputs
.
push_back
(
o2
);
v2
->
outputs
.
push_back
(
o3
);
v2
->
outputs
.
push_back
(
o4
);
// o2->v3->o5
// o2->outputs.push_back(v3);
o5
->
inputs
.
push_back
(
v3
);
// v3->inputs.push_back(o2);
v3
->
outputs
.
push_back
(
o5
);
// o3-v4->o5
o3
->
outputs
.
push_back
(
v4
);
// o5->inputs.push_back(v4);
v4
->
inputs
.
push_back
(
o3
);
// v4->outputs.push_back(o5);
}
TEST
(
GraphHelperTest
,
GraphNum
)
{
ProgramDesc
prog
;
Graph
g
(
prog
);
BuildZeroGraph
(
&
g
);
ASSERT_EQ
(
GraphNum
(
g
),
0
);
Graph
g2
(
prog
);
BuildOneGraph
(
&
g2
);
ASSERT_EQ
(
GraphNum
(
g2
),
1
);
Graph
g3
(
prog
);
BuildTwoGraphs
(
&
g3
);
ASSERT_EQ
(
GraphNum
(
g3
),
2
);
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/pass.cc
浏览文件 @
e79ad2ea
...
@@ -19,7 +19,6 @@ namespace paddle {
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
std
::
unique_ptr
<
Graph
>
Pass
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
std
::
unique_ptr
<
Graph
>
Pass
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
!
applied_
,
"Pass can only Apply() once."
);
PADDLE_ENFORCE
(
graph
.
get
(),
"graph passed to Pass::Apply() cannot be empty."
);
PADDLE_ENFORCE
(
graph
.
get
(),
"graph passed to Pass::Apply() cannot be empty."
);
for
(
const
std
::
string
&
attr
:
required_pass_attrs_
)
{
for
(
const
std
::
string
&
attr
:
required_pass_attrs_
)
{
PADDLE_ENFORCE
(
attrs_
.
find
(
attr
)
!=
attrs_
.
end
(),
PADDLE_ENFORCE
(
attrs_
.
find
(
attr
)
!=
attrs_
.
end
(),
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
e79ad2ea
...
@@ -42,6 +42,8 @@ class Pass {
...
@@ -42,6 +42,8 @@ class Pass {
attr_dels_
.
clear
();
attr_dels_
.
clear
();
}
}
std
::
string
Type
()
const
{
return
type_
;
}
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
;
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
;
// Get a reference to the attributed previously set.
// Get a reference to the attributed previously set.
...
@@ -52,6 +54,21 @@ class Pass {
...
@@ -52,6 +54,21 @@ class Pass {
return
*
boost
::
any_cast
<
AttrType
*>
(
attrs_
.
at
(
attr_name
));
return
*
boost
::
any_cast
<
AttrType
*>
(
attrs_
.
at
(
attr_name
));
}
}
bool
Has
(
const
std
::
string
&
attr_name
)
const
{
return
attrs_
.
find
(
attr_name
)
!=
attrs_
.
end
();
}
void
Erase
(
const
std
::
string
&
attr_name
)
{
if
(
!
Has
(
attr_name
))
{
return
;
}
if
(
attr_dels_
.
find
(
attr_name
)
!=
attr_dels_
.
end
())
{
attr_dels_
[
attr_name
]();
attr_dels_
.
erase
(
attr_name
);
}
attrs_
.
erase
(
attr_name
);
}
// Set a pointer to the attribute. Pass takes ownership of the attribute.
// Set a pointer to the attribute. Pass takes ownership of the attribute.
template
<
typename
AttrType
>
template
<
typename
AttrType
>
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
...
@@ -68,13 +85,15 @@ class Pass {
...
@@ -68,13 +85,15 @@ class Pass {
// should delete the attribute.
// should delete the attribute.
template
<
typename
AttrType
>
template
<
typename
AttrType
>
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
PADDLE_ENFORCE
(
attrs_
.
count
(
attr_name
)
==
0
);
PADDLE_ENFORCE
(
attrs_
.
count
(
attr_name
)
==
0
,
"%s already set in the pass"
,
attr_name
);
attrs_
[
attr_name
]
=
attr
;
attrs_
[
attr_name
]
=
attr
;
}
}
protected:
protected:
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
std
::
unique_ptr
<
Graph
>
graph
)
const
=
0
;
LOG
(
FATAL
)
<<
"Calling virtual Pass not implemented."
;
}
private:
private:
template
<
typename
PassType
>
template
<
typename
PassType
>
...
@@ -89,7 +108,10 @@ class Pass {
...
@@ -89,7 +108,10 @@ class Pass {
required_graph_attrs_
.
insert
(
attrs
.
begin
(),
attrs
.
end
());
required_graph_attrs_
.
insert
(
attrs
.
begin
(),
attrs
.
end
());
}
}
void
RegisterType
(
const
std
::
string
&
type
)
{
type_
=
type
;
}
mutable
bool
applied_
{
false
};
mutable
bool
applied_
{
false
};
std
::
string
type_
;
std
::
unordered_set
<
std
::
string
>
required_pass_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_pass_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_graph_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_graph_attrs_
;
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
...
@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
...
@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
PADDLE_ENFORCE
(
!
PassRegistry
::
Instance
().
Has
(
pass_type
),
PADDLE_ENFORCE
(
!
PassRegistry
::
Instance
().
Has
(
pass_type
),
"'%s' is registered more than once."
,
pass_type
);
"'%s' is registered more than once."
,
pass_type
);
PassRegistry
::
Instance
().
Insert
(
PassRegistry
::
Instance
().
Insert
(
pass_type
,
[
this
]()
->
std
::
unique_ptr
<
Pass
>
{
pass_type
,
[
this
,
pass_type
]()
->
std
::
unique_ptr
<
Pass
>
{
std
::
unique_ptr
<
Pass
>
pass
(
new
PassType
());
std
::
unique_ptr
<
Pass
>
pass
(
new
PassType
());
pass
->
RegisterRequiredPassAttrs
(
this
->
required_pass_attrs_
);
pass
->
RegisterRequiredPassAttrs
(
this
->
required_pass_attrs_
);
pass
->
RegisterRequiredGraphAttrs
(
this
->
required_graph_attrs_
);
pass
->
RegisterRequiredGraphAttrs
(
this
->
required_graph_attrs_
);
pass
->
RegisterType
(
pass_type
);
return
pass
;
return
pass
;
});
});
}
}
...
...
paddle/fluid/framework/ir/pass_builder.cc
0 → 100644
浏览文件 @
e79ad2ea
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/pass_builder.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
std
::
shared_ptr
<
Pass
>
PassBuilder
::
AppendPass
(
const
std
::
string
&
pass_type
)
{
auto
pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
passes_
.
emplace_back
(
pass
.
release
());
return
passes_
.
back
();
}
void
PassBuilder
::
RemovePass
(
size_t
idx
)
{
PADDLE_ENFORCE
(
passes_
.
size
()
>
idx
);
passes_
.
erase
(
passes_
.
begin
()
+
idx
);
}
std
::
shared_ptr
<
Pass
>
PassBuilder
::
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
)
{
PADDLE_ENFORCE
(
passes_
.
size
()
>=
idx
);
std
::
shared_ptr
<
Pass
>
pass
(
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
).
release
());
passes_
.
insert
(
passes_
.
begin
()
+
idx
,
std
::
move
(
pass
));
return
passes_
[
idx
];
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/pass_builder.h
0 → 100644
浏览文件 @
e79ad2ea
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
PassBuilder
{
public:
PassBuilder
()
{}
virtual
~
PassBuilder
()
{}
// Append a new pass to the end.
std
::
shared_ptr
<
Pass
>
AppendPass
(
const
std
::
string
&
pass_type
);
// Insert a new pass after `idx`.
std
::
shared_ptr
<
Pass
>
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
);
// Remove a new pass at `idx`.
void
RemovePass
(
size_t
idx
);
// Returns a list of all passes.
std
::
vector
<
std
::
shared_ptr
<
Pass
>>
AllPasses
()
const
{
return
passes_
;
}
protected:
std
::
vector
<
std
::
shared_ptr
<
Pass
>>
passes_
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/pass_test.cc
浏览文件 @
e79ad2ea
...
@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
...
@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_pass_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_pass_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
try
{
// Allow apply more than once.
graph
.
reset
(
new
Graph
(
prog
));
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
exception
=
std
::
string
(
e
.
what
());
}
ASSERT_TRUE
(
exception
.
find
(
"Pass can only Apply() once"
)
!=
exception
.
npos
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
e79ad2ea
...
@@ -17,10 +17,13 @@
...
@@ -17,10 +17,13 @@
#include <algorithm>
#include <algorithm>
#include <initializer_list>
#include <initializer_list>
#include <memory>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h"
#include "glog/logging.h"
...
@@ -28,173 +31,167 @@ namespace paddle {
...
@@ -28,173 +31,167 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
// Vector<T> implements the std::vector interface, and can get Data or
namespace
details
{
// MutableData from any place. The data will be synced implicitly inside.
struct
CUDABuffer
{
template
<
typename
T
>
void
*
data_
{
nullptr
};
class
Vector
{
size_t
size_
{
0
};
public:
platform
::
CUDAPlace
place_
;
using
value_type
=
T
;
// Default ctor. Create empty Vector
CUDABuffer
()
{}
Vector
()
{
InitEmpty
();
}
CUDABuffer
(
platform
::
Place
place
,
size_t
size
)
:
size_
(
size
),
place_
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
// Fill vector with value. The vector size is `count`.
data_
=
memory
::
Alloc
(
place_
,
size
);
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
ptr
[
i
]
=
value
;
}
}
}
}
// Ctor with init_list
~
CUDABuffer
()
{
ClearMemory
();
}
Vector
(
std
::
initializer_list
<
T
>
init
)
{
if
(
init
.
size
()
==
0
)
{
CUDABuffer
(
const
CUDABuffer
&
o
)
=
delete
;
InitEmpty
();
CUDABuffer
&
operator
=
(
const
CUDABuffer
&
o
)
=
delete
;
}
else
{
InitByIter
(
init
.
size
(),
init
.
begin
(),
init
.
end
());
void
Resize
(
platform
::
Place
place
,
size_t
size
)
{
ClearMemory
();
place_
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
data_
=
memory
::
Alloc
(
place_
,
size
);
PADDLE_ENFORCE_NOT_NULL
(
data_
);
size_
=
size
;
}
}
void
Swap
(
CUDABuffer
&
o
)
{
std
::
swap
(
data_
,
o
.
data_
);
std
::
swap
(
place_
,
o
.
place_
);
std
::
swap
(
size_
,
o
.
size_
);
}
}
// implicit cast from std::vector.
private:
template
<
typename
U
>
void
ClearMemory
()
const
{
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
if
(
data_
!=
nullptr
)
{
if
(
dat
.
size
()
==
0
)
{
memory
::
Free
(
place_
,
data_
);
InitEmpty
();
}
else
{
InitByIter
(
dat
.
size
(),
dat
.
begin
(),
dat
.
end
());
}
}
}
}
};
}
// namespace details
// Copy ctor
// Vector<T> implements the std::vector interface, and can get Data or
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
);
}
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
class
Vector
{
public:
using
value_type
=
T
;
using
iterator
=
typename
std
::
vector
<
T
>::
iterator
;
using
const_iterator
=
typename
std
::
vector
<
T
>::
const_iterator
;
// Copy operator
private:
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
// The actual class to implement vector logic
if
(
other
.
size
()
!=
0
)
{
class
VectorData
{
this
->
InitByIter
(
other
.
size
(),
other
.
begin
(),
other
.
end
());
public:
}
else
{
VectorData
()
:
flag_
(
kDataInCPU
)
{}
InitEmpty
();
VectorData
(
size_t
count
,
const
T
&
value
)
}
:
cpu_
(
count
,
value
),
flag_
(
kDataInCPU
)
{}
return
*
this
;
VectorData
(
std
::
initializer_list
<
T
>
init
)
:
cpu_
(
init
),
flag_
(
kDataInCPU
)
{}
}
template
<
typename
U
>
explicit
VectorData
(
const
std
::
vector
<
U
>
&
dat
)
:
cpu_
(
dat
),
flag_
(
kDataInCPU
)
{}
~
VectorData
()
{}
// Move ctor
VectorData
(
const
VectorData
&
o
)
{
Vector
(
Vector
<
T
>
&&
other
)
{
o
.
ImmutableCPU
();
this
->
size_
=
other
.
size_
;
cpu_
=
o
.
cpu_
;
this
->
flag_
=
other
.
flag_
;
flag_
=
kDataInCPU
;
if
(
other
.
cuda_vec_
.
memory_size
())
{
this
->
cuda_vec_
.
ShareDataWith
(
other
.
cuda_vec_
);
}
if
(
other
.
cpu_vec_
.
memory_size
())
{
this
->
cpu_vec_
.
ShareDataWith
(
other
.
cpu_vec_
);
}
}
VectorData
&
operator
=
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
details
::
CUDABuffer
null
;
gpu_
.
Swap
(
null
);
return
*
this
;
}
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
MutableCPU
();
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())
[
i
];
return
cpu_
[
i
];
}
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
ImmutableCPU
();
return
cpu_vec_
.
data
<
T
>
()
[
i
];
return
cpu_
[
i
];
}
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
cpu_
.
size
();
}
size_t
size
()
const
{
return
size_
;
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
iterator
begin
()
{
MutableCPU
();
return
cpu_
.
begin
();
}
T
*
end
()
{
iterator
end
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
MutableCPU
();
return
cpu_
.
end
();
}
}
T
&
front
()
{
return
*
begin
();
}
T
&
front
()
{
MutableCPU
();
return
cpu_
.
front
();
}
T
&
back
()
{
T
&
back
()
{
auto
it
=
end
();
MutableCPU
();
--
it
;
return
cpu_
.
back
();
return
*
it
;
}
}
const
T
*
begin
()
const
{
const_iterator
begin
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
ImmutableCPU
();
return
cpu_
.
begin
();
}
}
const
T
*
end
()
const
{
const_iterator
end
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
ImmutableCPU
();
return
cpu_
.
end
();
}
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
const
T
&
back
()
const
{
auto
it
=
end
();
ImmutableCPU
();
--
it
;
return
cpu_
.
back
();
return
*
it
;
}
}
T
*
data
()
{
return
begin
()
;
}
T
*
data
()
{
return
&
(
*
this
)[
0
]
;
}
const
T
*
data
()
const
{
return
begin
()
;
}
const
T
*
data
()
const
{
return
&
(
*
this
)[
0
]
;
}
const
T
&
front
()
const
{
return
*
begin
();
}
const
T
&
front
()
const
{
// end of std::vector iterator methods
ImmutableCPU
();
return
cpu_
.
front
();
}
// assign this from iterator.
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
void
assign
(
Iter
begin
,
Iter
end
)
{
InitByIter
(
end
-
begin
,
begin
,
end
);
MutableCPU
();
cpu_
.
assign
(
begin
,
end
);
}
}
// push_back. If the previous capacity is not enough, the memory will
// push_back. If the previous capacity is not enough, the memory will
// double.
// double.
void
push_back
(
T
elem
)
{
void
push_back
(
T
elem
)
{
if
(
size_
+
1
>
capacity
())
{
MutableCPU
();
reserve
((
size_
+
1
)
<<
1
);
cpu_
.
push_back
(
elem
);
}
*
end
()
=
elem
;
++
size_
;
}
}
// extend a vector by iterator.
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
// NOTE: the iterator must support end-begin
template
<
typename
It
>
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
void
Extend
(
It
begin
,
It
end
)
{
size_t
pre_size
=
size_
;
MutableCPU
();
resize
(
pre_size
+
(
end
-
begin
));
auto
out_it
=
std
::
back_inserter
<
std
::
vector
<
T
>>
(
this
->
cpu_
);
T
*
ptr
=
this
->
begin
()
+
pre_size
;
std
::
copy
(
begin
,
end
,
out_it
);
for
(;
begin
<
end
;
++
begin
,
++
ptr
)
{
*
ptr
=
*
begin
;
}
}
}
// resize the vector
// resize the vector
void
resize
(
size_t
size
)
{
void
resize
(
size_t
size
)
{
if
(
size
+
1
<=
capacity
())
{
size_
=
size
;
}
else
{
MutableCPU
();
MutableCPU
();
Tensor
cpu_tensor
;
cpu_
.
resize
(
size
);
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
const
T
*
old_ptr
=
cpu_vec_
.
memory_size
()
==
0
?
nullptr
:
cpu_vec_
.
data
<
T
>
();
if
(
old_ptr
!=
nullptr
)
{
std
::
copy
(
old_ptr
,
old_ptr
+
size_
,
ptr
);
}
size_
=
size
;
cpu_vec_
.
ShareDataWith
(
cpu_tensor
);
}
}
}
// get cuda ptr. immutable
// get cuda ptr. immutable
...
@@ -202,7 +199,7 @@ class Vector {
...
@@ -202,7 +199,7 @@ class Vector {
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
ImmutableCUDA
(
place
);
return
cuda_vec_
.
data
<
T
>
(
);
return
reinterpret_cast
<
T
*>
(
gpu_
.
data_
);
}
}
// get cuda ptr. mutable
// get cuda ptr. mutable
...
@@ -214,77 +211,39 @@ class Vector {
...
@@ -214,77 +211,39 @@ class Vector {
// clear
// clear
void
clear
()
{
void
clear
()
{
size_
=
0
;
cpu_
.
clear
()
;
flag_
=
kDirty
|
kDataInCPU
;
flag_
=
kDirty
|
kDataInCPU
;
}
}
size_t
capacity
()
const
{
size_t
capacity
()
const
{
return
cpu_
.
capacity
();
}
return
cpu_vec_
.
memory_size
()
/
SizeOfType
(
typeid
(
T
));
}
// reserve data
// reserve data
void
reserve
(
size_t
size
)
{
void
reserve
(
size_t
size
)
const
{
cpu_
.
reserve
(
size
);
}
size_t
pre_size
=
size_
;
resize
(
size
);
resize
(
pre_size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAMutableData
(
place
);
}
else
{
return
data
();
}
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
result
;
ImmutableCPU
();
result
.
resize
(
size
());
return
cpu_
;
std
::
copy
(
begin
(),
end
(),
result
.
begin
());
return
result
;
}
}
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
bool
operator
==
(
const
VectorData
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
ImmutableCPU
();
auto
it1
=
cbegin
();
other
.
ImmutableCPU
();
auto
it2
=
other
.
cbegin
();
return
cpu_
==
other
.
cpu_
;
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
}
private:
std
::
mutex
&
Mutex
()
const
{
return
mtx_
;
}
void
InitEmpty
()
{
size_
=
0
;
flag_
=
kDataInCPU
;
}
template
<
typename
Iter
>
std
::
unique_ptr
<
platform
::
CUDAPlace
>
CUDAPlace
()
const
{
void
InitByIter
(
size_t
size
,
Iter
begin
,
Iter
end
)
{
if
(
gpu_
.
data_
==
nullptr
)
{
platform
::
Place
cpu
=
platform
::
CPUPlace
();
return
nullptr
;
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
}
else
{
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
return
std
::
unique_ptr
<
platform
::
CUDAPlace
>
(
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
new
platform
::
CUDAPlace
(
gpu_
.
place_
));
*
ptr
++
=
*
begin
++
;
}
}
flag_
=
kDataInCPU
|
kDirty
;
size_
=
size
;
}
}
private:
enum
DataFlag
{
enum
DataFlag
{
kDataInCPU
=
0x01
,
kDataInCPU
=
0x01
,
kDataInCUDA
=
0x02
,
kDataInCUDA
=
0x02
,
...
@@ -294,8 +253,15 @@ class Vector {
...
@@ -294,8 +253,15 @@ class Vector {
void
CopyToCPU
()
const
{
void
CopyToCPU
()
const
{
// COPY GPU Data To CPU
// COPY GPU Data To CPU
TensorCopy
(
cuda_vec_
,
platform
::
CPUPlace
(),
&
cpu_vec_
);
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
WaitPlace
(
cuda_vec_
.
place
());
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
Place
(
gpu_
.
place_
)));
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
.
data_
;
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
gpu_
.
place_
,
src
,
gpu_
.
size_
,
stream
);
dev_ctx
->
Wait
();
}
}
void
MutableCPU
()
{
void
MutableCPU
()
{
...
@@ -308,16 +274,12 @@ class Vector {
...
@@ -308,16 +274,12 @@ class Vector {
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
if
(
IsInCPU
())
{
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
CopyCPUDataToCUDA
(
place
);
&
cuda_vec_
);
WaitPlace
(
place
);
UnsetFlag
(
kDirty
);
UnsetFlag
(
kDirty
);
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
cuda_vec_
.
place
()))
{
}
else
if
(
IsInCUDA
()
&&
framework
::
Tensor
tmp
;
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
PADDLE_THROW
(
"This situation should not happen"
);
WaitPlace
(
cuda_vec_
.
place
());
cuda_vec_
.
ShareDataWith
(
tmp
);
// Still dirty
// Still dirty
}
else
{
}
else
{
// Dirty && DataInCUDA && Device is same
// Dirty && DataInCUDA && Device is same
...
@@ -326,17 +288,10 @@ class Vector {
...
@@ -326,17 +288,10 @@ class Vector {
}
else
{
}
else
{
if
(
!
IsInCUDA
())
{
if
(
!
IsInCUDA
())
{
// Even data is not dirty. However, data is not in CUDA. Copy data.
// Even data is not dirty. However, data is not in CUDA. Copy data.
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
CopyCPUDataToCUDA
(
place
);
&
cuda_vec_
);
WaitPlace
(
place
);
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
place
==
cuda_vec_
.
place
()))
{
}
else
if
(
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
framework
::
Tensor
tmp
;
PADDLE_THROW
(
"This situation should not happen."
);
WaitPlace
(
cuda_vec_
.
place
());
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
WaitPlace
(
place
);
cuda_vec_
.
ShareDataWith
(
tmp
);
}
else
{
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
// Do nothing.
...
@@ -344,9 +299,20 @@ class Vector {
...
@@ -344,9 +299,20 @@ class Vector {
}
}
}
}
void
CopyCPUDataToCUDA
(
const
platform
::
Place
&
place
)
const
{
void
*
src
=
cpu_
.
data
();
gpu_
.
Resize
(
place
,
cpu_
.
size
()
*
sizeof
(
T
));
void
*
dst
=
gpu_
.
data_
;
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
gpu_
.
place_
,
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
.
size_
,
stream
);
}
void
ImmutableCPU
()
const
{
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
!
IsInCPU
())
{
// If data has been changed in CUDA, or
CPU has no data.
//
CPU has no data.
CopyToCPU
();
CopyToCPU
();
UnsetFlag
(
kDirty
);
UnsetFlag
(
kDirty
);
}
}
...
@@ -362,23 +328,178 @@ class Vector {
...
@@ -362,23 +328,178 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
static
void
WaitPlace
(
const
platform
::
Place
place
)
{
mutable
std
::
vector
<
T
>
cpu_
;
mutable
details
::
CUDABuffer
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
};
public:
// Default ctor. Create empty Vector
Vector
()
:
m_
(
new
VectorData
())
{}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
:
m_
(
new
VectorData
(
count
,
value
))
{}
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
:
m_
(
new
VectorData
(
init
))
{}
// implicit cast from std::vector.
template
<
typename
U
>
Vector
(
const
std
::
vector
<
U
>
&
dat
)
:
m_
(
new
VectorData
(
dat
))
{
// NOLINT
}
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
return
*
this
;
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
m_
=
std
::
move
(
other
.
m_
);
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
return
(
*
m_
.
MutableData
())[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
return
m_
.
Data
()[
i
];
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
m_
.
Data
().
size
();
}
iterator
begin
()
{
return
m_
.
MutableData
()
->
begin
();
}
iterator
end
()
{
return
m_
.
MutableData
()
->
end
();
}
T
&
front
()
{
return
m_
.
MutableData
()
->
front
();
}
T
&
back
()
{
return
m_
.
MutableData
()
->
back
();
}
const_iterator
begin
()
const
{
return
m_
.
Data
().
begin
();
}
const_iterator
end
()
const
{
return
m_
.
Data
().
end
();
}
const_iterator
cbegin
()
const
{
return
begin
();
}
const_iterator
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
return
m_
.
Data
().
back
();
}
T
*
data
()
{
return
m_
.
MutableData
()
->
data
();
}
const
T
*
data
()
const
{
return
m_
.
Data
().
data
();
}
const
T
&
front
()
const
{
return
m_
.
Data
().
front
();
}
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
m_
.
MutableData
()
->
assign
(
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
m_
.
MutableData
()
->
push_back
(
elem
);
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
m_
.
MutableData
()
->
Extend
(
begin
,
end
);
}
// resize the vector
void
resize
(
size_t
size
)
{
if
(
m_
.
Data
().
size
()
!=
size
)
{
m_
.
MutableData
()
->
resize
(
size
);
}
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
Data
().
CUDAData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAData
(
place
);
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
MutableData
()
->
CUDAMutableData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAMutableData
(
place
);
}
// clear
void
clear
()
{
m_
.
MutableData
()
->
clear
();
}
size_t
capacity
()
const
{
return
m_
.
Data
().
capacity
();
}
// reserve data
void
reserve
(
size_t
size
)
{
m_
.
Data
().
reserve
(
size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
platform
::
DeviceContextPool
::
Instance
()
return
CUDAMutableData
(
place
);
.
Get
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
}
else
{
->
Wait
();
return
data
();
}
}
}
}
static
T
&
EmptyDummy
()
{
// implicit cast operator. Vector can be cast to std::vector implicitly.
static
T
dummy
=
T
();
operator
std
::
vector
<
T
>
()
const
{
return
m_
.
Data
();
}
return
dummy
;
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
}
mutable
int
flag_
;
const
void
*
Handle
()
const
{
return
&
m_
.
Data
();
}
mutable
Tensor
cpu_vec_
;
mutable
Tensor
cuda_vec_
;
private:
size_t
size_
;
// Vector is an COW object.
mutable
details
::
COWPtr
<
VectorData
>
m_
;
};
};
#else // PADDLE_WITH_CUDA
#else // PADDLE_WITH_CUDA
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e79ad2ea
...
@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and
...
@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include <string>
#include <string>
#include <tuple>
#include <tuple>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -35,80 +33,6 @@ limitations under the License. */
...
@@ -35,80 +33,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
std
::
unique_ptr
<
ir
::
Graph
>
ApplyParallelExecutorPass
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
bool
use_cuda
,
#ifdef PADDLE_WITH_CUDA
const
BuildStrategy
&
strategy
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
{
#else
const
BuildStrategy
&
strategy
)
{
#endif
// Convert the program to graph.
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
// Apply a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_original_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
graph
=
viz_pass
->
Apply
(
std
::
move
(
graph
));
}
// Apply op fusion.
if
(
strategy
.
fuse_elewise_add_act_ops_
)
{
auto
fuse_elewise_add_act_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"fuse_elewise_add_act_pass"
);
graph
=
fuse_elewise_add_act_pass
->
Apply
(
std
::
move
(
graph
));
// Apply a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
graph
=
viz_pass
->
Apply
(
std
::
move
(
graph
));
}
}
// Convert graph to run on multi-devices.
auto
multi_devices_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_pass"
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
&
strategy
);
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
multi_devices_pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
#endif
graph
=
multi_devices_pass
->
Apply
(
std
::
move
(
graph
));
// Apply a graph print pass to record a graph with device info.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
multi_devices_print_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_print_pass"
);
multi_devices_print_pass
->
SetNotOwned
<
const
std
::
string
>
(
"debug_graphviz_path"
,
&
strategy
.
debug_graphviz_path_
);
multi_devices_print_pass
->
Set
<
details
::
GraphvizSSAGraphPrinter
>
(
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
graph
=
multi_devices_print_pass
->
Apply
(
std
::
move
(
graph
));
}
// Verify that the graph is correct for multi-device executor.
auto
multi_devices_check_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_check_pass"
);
graph
=
multi_devices_check_pass
->
Apply
(
std
::
move
(
graph
));
return
graph
;
}
class
ParallelExecutorPrivate
{
class
ParallelExecutorPrivate
{
public:
public:
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
...
@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
...
@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
// ncclOp
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
ApplyParallelExecutorPass
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
build_strategy
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
member_
->
nccl_ctxs_
.
get
());
auto
max_memory_size
=
GetEagerDeletionThreshold
();
auto
max_memory_size
=
GetEagerDeletionThreshold
();
if
(
max_memory_size
>=
0
)
{
if
(
max_memory_size
>=
0
)
{
...
@@ -228,11 +151,17 @@ ParallelExecutor::ParallelExecutor(
...
@@ -228,11 +151,17 @@ ParallelExecutor::ParallelExecutor(
}
}
}
}
#else
#else
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
ApplyParallelExecutorPass
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
build_strategy
);
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
#endif
// If the loss_var_name is given, the number of graph should be only one.
if
(
loss_var_name
.
size
())
{
PADDLE_ENFORCE_EQ
(
ir
::
GraphNum
(
*
graph
),
1
,
"The number of graph should be only one"
);
}
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
...
@@ -373,12 +302,6 @@ ParallelExecutor::~ParallelExecutor() {
...
@@ -373,12 +302,6 @@ ParallelExecutor::~ParallelExecutor() {
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
USE_PASS
(
reference_count_pass
);
USE_PASS
(
reference_count_pass
);
#endif
#endif
paddle/fluid/framework/parallel_executor.h
浏览文件 @
e79ad2ea
...
@@ -14,14 +14,14 @@ limitations under the License. */
...
@@ -14,14 +14,14 @@ limitations under the License. */
#pragma once
#pragma once
#include <paddle/fluid/framework/details/build_strategy.h>
#include <atomic>
#include <atomic>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
paddle/fluid/operators/detection_map_op.h
浏览文件 @
e79ad2ea
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
auto
label_lod
=
in_label
->
lod
();
auto
&
label_lod
=
in_label
->
lod
();
auto
detect_lod
=
in_detect
->
lod
();
auto
&
detect_lod
=
in_detect
->
lod
();
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
label_lod
=
input_label
.
lod
();
auto
&
label_lod
=
input_label
.
lod
();
auto
detect_lod
=
input_detect
.
lod
();
auto
&
detect_lod
=
input_detect
.
lod
();
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
auto
label_index
=
label_lod
[
0
];
auto
&
label_index
=
label_lod
[
0
];
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
...
@@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
output_true_pos
->
set_lod
(
true_pos_lod
);
output_true_pos
->
set_lod
(
true_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
return
;
}
}
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
...
@@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
auto
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
auto
&
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
T
score
=
pos_data
[
j
*
2
];
T
score
=
pos_data
[
j
*
2
];
...
@@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
int
batch_size
=
gt_boxes
.
size
();
int
batch_size
=
gt_boxes
.
size
();
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
auto
image_gt_boxes
=
gt_boxes
[
n
];
auto
&
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
it
=
image_gt_boxes
.
begin
();
it
!=
image_gt_boxes
.
end
();
++
it
)
{
for
(
auto
&
image_gt_box
:
image_gt_boxes
)
{
size_t
count
=
0
;
size_t
count
=
0
;
auto
labeled_bboxes
=
it
->
second
;
auto
&
labeled_bboxes
=
image_gt_box
.
second
;
if
(
evaluate_difficult
)
{
if
(
evaluate_difficult
)
{
count
=
labeled_bboxes
.
size
();
count
=
labeled_bboxes
.
size
();
}
else
{
}
else
{
for
(
size_t
i
=
0
;
i
<
labeled_bboxes
.
size
();
++
i
)
for
(
auto
&
box
:
labeled_bboxes
)
{
if
(
!
(
labeled_bboxes
[
i
].
is_difficult
))
++
count
;
if
(
!
box
.
is_difficult
)
{
++
count
;
}
}
}
}
if
(
count
==
0
)
{
if
(
count
==
0
)
{
continue
;
continue
;
}
}
int
label
=
i
t
->
first
;
int
label
=
i
mage_gt_box
.
first
;
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
(
*
label_pos_count
)[
label
]
=
count
;
(
*
label_pos_count
)[
label
]
=
count
;
}
else
{
}
else
{
...
...
paddle/fluid/operators/extract_rows_op.cc
浏览文件 @
e79ad2ea
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
in_rows
=
in
.
rows
();
auto
&
in_rows
=
in
.
rows
();
auto
out_dim
=
framework
::
make_ddim
(
auto
out_dim
=
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
...
...
paddle/fluid/operators/lookup_table_op.cu
浏览文件 @
e79ad2ea
...
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
());
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
());
// TODO(yuyang18): Strange code here.
// TODO(yuyang18): Strange code here.
memory
::
Copy
(
platform
::
CPUPlace
(),
memory
::
Copy
(
gpu_place
,
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
gpu_place
,
gpu_place
,
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
d_table
->
set_rows
(
new_rows
);
d_table
->
set_rows
(
new_rows
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
auto
*
d_table_value
=
d_table
->
mutable_value
();
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
e79ad2ea
...
@@ -60,11 +60,9 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
...
@@ -60,11 +60,9 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
auto
out_place
=
context
.
GetPlace
();
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
memory
::
Copy
(
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
).
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
...
@@ -148,7 +146,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
...
@@ -148,7 +146,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
auto
in1_height
=
input1
.
height
();
auto
in1_height
=
input1
.
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
()
);
auto
&
in1_rows
=
input1
.
rows
(
);
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_value
=
input1
.
value
();
...
...
paddle/fluid/operators/sampling_id_op.cc
浏览文件 @
e79ad2ea
...
@@ -53,15 +53,16 @@ class SamplingIdOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -53,15 +53,16 @@ class SamplingIdOpMaker : public framework::OpProtoAndCheckerMaker {
SamplingId Operator.
SamplingId Operator.
A layer for sampling id from multinomial distribution from the
A layer for sampling id from multinomial distribution from the
input. Sampling one id for one sample.)DOC"
);
input. Sampling one id for one sample.)DOC"
);
AddAttr
<
float
>
(
"min"
,
"Minimum value of random.
[default 0.0]
."
)
AddAttr
<
float
>
(
"min"
,
"Minimum value of random.
(float, default 0.0)
."
)
.
SetDefault
(
0.0
f
);
.
SetDefault
(
0.0
f
);
AddAttr
<
float
>
(
"max"
,
"Maximun value of random.
[default 1.0]
."
)
AddAttr
<
float
>
(
"max"
,
"Maximun value of random.
(float, default 1.0)
."
)
.
SetDefault
(
1.0
f
);
.
SetDefault
(
1.0
f
);
AddAttr
<
int
>
(
"seed"
,
AddAttr
<
int
>
(
"seed"
,
"Random seed used for the random number engine. "
"Random seed used for the random number engine. "
"0 means use a seed generated by the system."
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time. [default 0]
."
)
"generate the same random numbers every time. (int, default 0)
."
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
}
}
};
};
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
e79ad2ea
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#
define EIGEN_USE_GPU
#
include <algorithm>
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/operators/sgd_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
...
@@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate,
...
@@ -33,22 +33,21 @@ __global__ void SGDKernel(const T* g, const T* p, const T* learning_rate,
}
}
}
}
template
<
typename
T
,
int
block_size
>
template
<
typename
T
>
__global__
void
SparseSGDFunctorKernel
(
const
T
*
selected_rows
,
__global__
void
SparseSGDFunctorKernel
(
const
T
*
selected_rows
,
const
int64_t
*
rows
,
const
int64_t
*
rows
,
const
T
*
learning_rate
,
T
*
tensor_out
,
const
T
*
learning_rate
,
T
*
tensor_out
,
int64_t
row_numel
)
{
int64_t
row_numel
,
int64_t
limit
)
{
const
int
ty
=
blockIdx
.
y
;
for
(
int64_t
i
=
blockIdx
.
x
;
i
<
limit
;
i
+=
gridDim
.
x
)
{
int
tid
=
threadIdx
.
x
;
const
T
*
selected_rows_ptr
=
selected_rows
+
i
*
row_numel
;
T
*
tensor_out_ptr
=
tensor_out
+
rows
[
i
]
*
row_numel
;
selected_rows
+=
ty
*
row_numel
;
for
(
int64_t
index
=
threadIdx
.
x
;
index
<
row_numel
;
index
+=
blockDim
.
x
)
{
tensor_out
+=
rows
[
ty
]
*
row_numel
;
for
(
int
index
=
tid
;
index
<
row_numel
;
index
+=
block_size
)
{
// Since index in rows of SelectedRows can be duplicate, we have to use
// Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error.
// Atomic Operation to avoid concurrent write error.
paddle
::
platform
::
CudaAtomicAdd
(
paddle
::
platform
::
CudaAtomicAdd
(
tensor_out
+
index
,
-
1.0
*
learning_rate
[
0
]
*
selected_rows
[
index
]);
tensor_out_ptr
+
index
,
-
1.0
*
learning_rate
[
0
]
*
selected_rows_ptr
[
index
]);
}
}
}
}
}
}
// namespace
}
// namespace
...
@@ -89,7 +88,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -89,7 +88,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
auto
&
in_value
=
grad
->
value
();
auto
&
in_value
=
grad
->
value
();
framework
::
Vector
<
int64_t
>
in_rows
(
grad
->
rows
()
);
auto
&
in_rows
=
grad
->
rows
(
);
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
...
@@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -97,13 +96,15 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
const
int
block_size
=
256
;
const
int
kThreadsPerBlock
=
256
;
dim3
threads
(
block_size
,
1
);
int
thread_x
=
kThreadsPerBlock
;
dim3
grid
(
1
,
in_rows
.
size
());
int
max_threads
=
ctx
.
cuda_device_context
().
GetMaxPhysicalThreadCount
();
SparseSGDFunctorKernel
<
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
T
,
256
><<<
grid
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
in_data
,
in_rows
.
CUDAData
(
ctx
.
GetPlace
()),
learning_rate
->
data
<
T
>
(),
in_data
,
in_rows
.
CUDAData
(
ctx
.
GetPlace
()),
learning_rate
->
data
<
T
>
(),
out_data
,
in_row_numel
);
out_data
,
in_row_numel
,
in_rows
.
size
()
);
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
e79ad2ea
...
@@ -124,7 +124,6 @@ class SumKernel : public framework::OpKernel<T> {
...
@@ -124,7 +124,6 @@ class SumKernel : public framework::OpKernel<T> {
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// if all the input sparse vars are empty, no need to
// if all the input sparse vars are empty, no need to
// merge these vars.
// merge these vars.
if
(
first_dim
==
0UL
)
{
if
(
first_dim
==
0UL
)
{
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
e79ad2ea
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method
pass_builder
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e79ad2ea
...
@@ -25,6 +25,7 @@ limitations under the License. */
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
@@ -595,6 +596,29 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -595,6 +596,29 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
.
def
(
"set_str"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
});
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
m
,
"PassBuilder"
);
pb
.
def
(
py
::
init
())
.
def
(
"append_pass"
,
[](
ir
::
PassBuilder
&
self
,
const
std
::
string
&
pass_type
)
->
std
::
shared_ptr
<
ir
::
Pass
>
{
return
self
.
AppendPass
(
pass_type
);
})
.
def
(
"all_passes"
,
[](
ir
::
PassBuilder
&
self
)
{
return
self
.
AllPasses
();
})
.
def
(
"insert_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
,
const
std
::
string
&
pass_type
)
{
return
self
.
InsertPass
(
idx
,
pass_type
);
})
.
def
(
"remove_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
)
{
self
.
RemovePass
(
idx
);
});
// -- python binds for parallel executor.
// -- python binds for parallel executor.
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
);
...
@@ -677,6 +701,10 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -677,6 +701,10 @@ All parameter, weight, gradient are variables in Paddle.
},
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
self
.
fuse_elewise_add_act_ops_
=
b
;
self
.
fuse_elewise_add_act_ops_
=
b
;
})
.
def
(
"_create_passes_from_strategy"
,
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
return
self
.
CreatePassesFromStrategy
();
});
});
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
e79ad2ea
...
@@ -70,8 +70,8 @@ function cmake_gen() {
...
@@ -70,8 +70,8 @@ function cmake_gen() {
PYTHON_FLAGS
=
""
PYTHON_FLAGS
=
""
SYSTEM
=
`
uname
-s
`
SYSTEM
=
`
uname
-s
`
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
echo
"Using python abi:
$1
"
if
[[
"
$1
"
==
"cp27-cp27m"
]]
||
[[
"
$1
"
==
""
]]
;
then
if
[[
"
$1
"
==
"cp27-cp27m"
]]
||
[[
"
$1
"
==
""
]]
;
then
echo
"using python abi:
$1
"
if
[
-d
"/Library/Frameworks/Python.framework/Versions/2.7"
]
;
then
if
[
-d
"/Library/Frameworks/Python.framework/Versions/2.7"
]
;
then
export
LD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
export
LD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
export
DYLD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
export
DYLD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
...
@@ -82,7 +82,18 @@ function cmake_gen() {
...
@@ -82,7 +82,18 @@ function cmake_gen() {
else
else
exit
1
exit
1
fi
fi
# TODO: qiyang add python3 part here
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
if
[
-d
"/Library/Frameworks/Python.framework/Versions/3.5"
]
;
then
export
LD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/lib/
export
DYLD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/lib/
export
PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/bin/:
${
PATH
}
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
else
exit
1
fi
fi
fi
else
else
if
[
"
$1
"
!=
""
]
;
then
if
[
"
$1
"
!=
""
]
;
then
...
@@ -731,6 +742,10 @@ function main() {
...
@@ -731,6 +742,10 @@ function main() {
build_mac
build_mac
run_mac_test
${
PROC_RUN
:-
1
}
run_mac_test
${
PROC_RUN
:-
1
}
;;
;;
macbuild
)
cmake_gen
${
PYTHON_ABI
:-
""
}
build_mac
;;
cicheck_py35
)
cicheck_py35
)
cmake_gen
${
PYTHON_ABI
:-
""
}
cmake_gen
${
PYTHON_ABI
:-
""
}
build
build
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
e79ad2ea
...
@@ -284,7 +284,7 @@ def detection_output(loc,
...
@@ -284,7 +284,7 @@ def detection_output(loc,
target_box
=
loc
,
target_box
=
loc
,
code_type
=
'decode_center_size'
)
code_type
=
'decode_center_size'
)
compile_shape
=
scores
.
shape
compile_shape
=
scores
.
shape
run_shape
=
ops
.
shape
(
scores
)
run_shape
=
nn
.
shape
(
scores
)
scores
=
nn
.
flatten
(
x
=
scores
,
axis
=
2
)
scores
=
nn
.
flatten
(
x
=
scores
,
axis
=
2
)
scores
=
nn
.
softmax
(
input
=
scores
)
scores
=
nn
.
softmax
(
input
=
scores
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
...
@@ -697,7 +697,7 @@ def ssd_loss(location,
...
@@ -697,7 +697,7 @@ def ssd_loss(location,
raise
ValueError
(
"Only support mining_type == max_negative now."
)
raise
ValueError
(
"Only support mining_type == max_negative now."
)
num
,
num_prior
,
num_class
=
confidence
.
shape
num
,
num_prior
,
num_class
=
confidence
.
shape
conf_shape
=
ops
.
shape
(
confidence
)
conf_shape
=
nn
.
shape
(
confidence
)
def
__reshape_to_2d
(
var
):
def
__reshape_to_2d
(
var
):
return
nn
.
flatten
(
x
=
var
,
axis
=
2
)
return
nn
.
flatten
(
x
=
var
,
axis
=
2
)
...
@@ -724,7 +724,7 @@ def ssd_loss(location,
...
@@ -724,7 +724,7 @@ def ssd_loss(location,
target_label
.
stop_gradient
=
True
target_label
.
stop_gradient
=
True
conf_loss
=
nn
.
softmax_with_cross_entropy
(
confidence
,
target_label
)
conf_loss
=
nn
.
softmax_with_cross_entropy
(
confidence
,
target_label
)
# 3. Mining hard examples
# 3. Mining hard examples
actual_shape
=
ops
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
])
actual_shape
=
nn
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
])
actual_shape
.
stop_gradient
=
True
actual_shape
.
stop_gradient
=
True
conf_loss
=
nn
.
reshape
(
conf_loss
=
nn
.
reshape
(
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
actual_shape
)
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
actual_shape
)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
e79ad2ea
...
@@ -507,7 +507,6 @@ def py_reader(capacity,
...
@@ -507,7 +507,6 @@ def py_reader(capacity,
1. The basic usage of :code:`py_reader` is as follows:
1. The basic usage of :code:`py_reader` is as follows:
>>> import paddle.v2
>>> import paddle.fluid as fluid
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle.dataset.mnist as mnist
>>>
>>>
...
@@ -515,7 +514,7 @@ def py_reader(capacity,
...
@@ -515,7 +514,7 @@ def py_reader(capacity,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> dtypes=['float32', 'int64'])
>>> dtypes=['float32', 'int64'])
>>> reader.decorate_paddle_reader(
>>> reader.decorate_paddle_reader(
>>> paddle.
v2.
reader.shuffle(paddle.batch(mnist.train())
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>> loss = network(img, label) # some network definition
...
@@ -534,7 +533,6 @@ def py_reader(capacity,
...
@@ -534,7 +533,6 @@ def py_reader(capacity,
2. When training and testing are both performed, two different
2. When training and testing are both performed, two different
:code:`py_reader` should be created with different names, e.g.:
:code:`py_reader` should be created with different names, e.g.:
>>> import paddle.v2
>>> import paddle.fluid as fluid
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle.dataset.mnist as mnist
>>>
>>>
...
@@ -548,7 +546,7 @@ def py_reader(capacity,
...
@@ -548,7 +546,7 @@ def py_reader(capacity,
>>> dtypes=['float32', 'int64'],
>>> dtypes=['float32', 'int64'],
>>> name='train_reader')
>>> name='train_reader')
>>> train_reader.decorate_paddle_reader(
>>> train_reader.decorate_paddle_reader(
>>> paddle.
v2.
reader.shuffle(paddle.batch(mnist.train())
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>>
>>> test_reader = fluid.layers.py_reader(capacity=32,
>>> test_reader = fluid.layers.py_reader(capacity=32,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> shapes=[(-1,3,224,224), (-1,1)],
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
e79ad2ea
...
@@ -29,110 +29,29 @@ from .. import unique_name
...
@@ -29,110 +29,29 @@ from .. import unique_name
from
functools
import
reduce
from
functools
import
reduce
__all__
=
[
__all__
=
[
'fc'
,
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'embedding'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'dynamic_lstm'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'conv3d'
,
'dynamic_lstmp'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'pool3d'
,
'dynamic_gru'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'conv3d_transpose'
,
'gru_unit'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'linear_chain_crf'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'crf_decoding'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'cos_sim'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'cross_entropy'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'square_error_cost'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'chunk_eval'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'sequence_conv'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'conv2d'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'conv3d'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'sequence_pool'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'sequence_softmax'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'softmax'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'pool2d'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'pool3d'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'batch_norm'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'beam_search_decode'
,
'elementwise_pow'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'conv2d_transpose'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
'conv3d_transpose'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
]
]
...
@@ -6463,6 +6382,246 @@ def expand(x, expand_times, name=None):
...
@@ -6463,6 +6382,246 @@ def expand(x, expand_times, name=None):
return
out
return
out
from
paddle.fluid.framework
import
convert_np_dtype_to_dtype_
@
templatedoc
()
def
uniform_random_batch_size_like
(
input
,
shape
,
dtype
=
'float32'
,
input_dim_idx
=
0
,
output_dim_idx
=
0
,
min
=-
1.0
,
max
=
1.0
,
seed
=
0
):
"""
${comment}
Args:
input (Variable): ${input_comment}
shape (tuple|list): ${shape_comment}
input_dim_idx (Int): ${input_dim_idx_comment}
output_dim_idx (Int): ${output_dim_idx_comment}
min (Float): ${min_comment}
max (Float): ${max_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'uniform_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'uniform_random_batch_size_like'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'input_dim_idx'
:
input_dim_idx
,
'output_dim_idx'
:
output_dim_idx
,
'min'
:
min
,
'max'
:
max
,
'seed'
:
seed
,
'dtype'
:
c_dtype
})
return
out
@
templatedoc
()
def
gaussian_random
(
shape
,
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
,
use_mkldnn
=
False
):
"""
${comment}
Args:
shape (tuple|list): ${shape_comment}
mean (Float): ${mean_comment}
std (Float): ${std_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): Output data type.
use_mkldnn (Bool): Only used in mkldnn kernel.
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'gaussian_random'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'gaussian_random'
,
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'mean'
:
mean
,
'std'
:
std
,
'seed'
:
seed
,
'dtype'
:
c_dtype
,
'use_mkldnn'
:
use_mkldnn
})
return
out
@
templatedoc
()
def
sampling_id
(
x
,
min
=
0.0
,
max
=
1.0
,
seed
=
0
,
dtype
=
'float32'
):
"""
${comment}
Args:
x (Variable): ${x_comment}
min (Float): ${min_comment}
max (Float): ${max_comment}
seed (Float): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'sampling_id'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
'sampling_id'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'min'
:
min
,
'max'
:
max
,
'seed'
:
seed
})
return
out
@
templatedoc
()
def
gaussian_random_batch_size_like
(
input
,
shape
,
input_dim_idx
=
0
,
output_dim_idx
=
0
,
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
):
"""
${comment}
Args:
input (Variable): ${input_comment}
shape (tuple|list): ${shape_comment}
input_dim_idx (Int): ${input_dim_idx_comment}
output_dim_idx (Int): ${output_dim_idx_comment}
mean (Float): ${mean_comment}
std (Float): ${std_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'gaussian_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'gaussian_random_batch_size_like'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'input_dim_idx'
:
input_dim_idx
,
'output_dim_idx'
:
output_dim_idx
,
'mean'
:
mean
,
'std'
:
std
,
'seed'
:
seed
,
'dtype'
:
c_dtype
})
return
out
@
templatedoc
()
def
sum
(
x
,
use_mkldnn
=
False
):
"""
${comment}
Args:
x (Variable): ${x_comment}
use_mkldnn (Bool): ${use_mkldnn_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'sum'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'x'
))
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'use_mkldnn'
:
use_mkldnn
})
return
out
@
templatedoc
()
def
slice
(
input
,
axes
,
starts
,
ends
):
"""
${comment}
Args:
input (Variable): ${input_comment}.
axes (List): ${axes_comment}
starts (List): ${starts_comment}
ends (List): ${ends_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'slice'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
type
=
'slice'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'axes'
:
axes
,
'starts'
:
starts
,
'ends'
:
ends
})
return
out
@
templatedoc
()
def
shape
(
input
):
"""
${comment}
Args:
input (Variable): ${input_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'shape'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
type
=
'shape'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
})
return
out
def
_elementwise_op
(
helper
):
def
_elementwise_op
(
helper
):
op_type
=
helper
.
layer_type
op_type
=
helper
.
layer_type
x
=
helper
.
kwargs
.
get
(
'x'
,
None
)
x
=
helper
.
kwargs
.
get
(
'x'
,
None
)
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
e79ad2ea
...
@@ -45,13 +45,6 @@ __all__ = [
...
@@ -45,13 +45,6 @@ __all__ = [
'logical_or'
,
'logical_or'
,
'logical_xor'
,
'logical_xor'
,
'logical_not'
,
'logical_not'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
,
'maxout'
,
'maxout'
,
]
]
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
e79ad2ea
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
if
isinstance
(
expect
,
tuple
):
if
isinstance
(
expect
,
tuple
):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
expect
[
1
],
"Output ("
+
out_name
+
...
...
python/paddle/fluid/tests/unittests/test_detection_map_op.py
浏览文件 @
e79ad2ea
...
@@ -20,6 +20,7 @@ import six
...
@@ -20,6 +20,7 @@ import six
import
sys
import
sys
import
collections
import
collections
import
math
import
math
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest):
...
@@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest):
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
if
(
len
(
self
.
class_pos_count
)
>
0
)
:
if
len
(
self
.
class_pos_count
)
>
0
:
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
'int32'
)
'int32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
...
@@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
...
@@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
self
.
class_pos_count
=
[
0
,
2
,
1
]
self
.
class_pos_count
=
[
0
,
2
,
1
,
0
]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
e79ad2ea
...
@@ -541,7 +541,7 @@ class TestBook(unittest.TestCase):
...
@@ -541,7 +541,7 @@ class TestBook(unittest.TestCase):
with
program_guard
(
program
):
with
program_guard
(
program
):
input
=
layers
.
data
(
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
3
,
100
,
100
],
dtype
=
"float32"
)
name
=
"input"
,
shape
=
[
3
,
100
,
100
],
dtype
=
"float32"
)
out
=
layers
.
shape
(
input
,
name
=
"shape"
)
out
=
layers
.
shape
(
input
)
self
.
assertIsNotNone
(
out
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
print
(
str
(
program
))
...
@@ -758,6 +758,65 @@ class TestBook(unittest.TestCase):
...
@@ -758,6 +758,65 @@ class TestBook(unittest.TestCase):
out
=
layers
.
expand
(
x
,
[
1
,
2
])
out
=
layers
.
expand
(
x
,
[
1
,
2
])
print
(
str
(
program
))
print
(
str
(
program
))
def
test_uniform_random_batch_size_like
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
uniform_random_batch_size_like
(
input
,
[
-
1
,
11
])
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_gaussian_random
(
self
):
program
=
Program
()
with
program_guard
(
program
):
out
=
layers
.
gaussian_random
(
shape
=
[
20
,
30
])
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_sampling_id
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
,
append_batch_size
=
False
)
out
=
layers
.
sampling_id
(
x
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_gaussian_random_batch_size_like
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
gaussian_random_batch_size_like
(
input
,
shape
=
[
-
1
,
11
],
mean
=
1.0
,
std
=
2.0
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_sum
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
sum
(
input
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_slice
(
self
):
starts
=
[
1
,
0
,
2
]
ends
=
[
3
,
3
,
4
]
axes
=
[
0
,
1
,
2
]
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
3
,
4
,
5
,
6
],
dtype
=
'float32'
)
out
=
layers
.
slice
(
input
,
axes
=
axes
,
starts
=
starts
,
ends
=
ends
)
def
test_softshrink
(
self
):
def
test_softshrink
(
self
):
program
=
Program
()
program
=
Program
()
with
program_guard
(
program
):
with
program_guard
(
program
):
...
...
python/paddle/fluid/tests/unittests/test_pass_builder.py
0 → 100644
浏览文件 @
e79ad2ea
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
numpy
as
np
import
unittest
import
os
import
sys
import
math
def
simple_fc_net
():
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
img
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'tanh'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestPassBuilder
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
use_cuda
,
build_strategy
=
None
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
simple_fc_net
()
test_program
=
main
.
clone
(
for_test
=
True
)
opt
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
batch_size
=
32
image
=
np
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
feed_dict
=
{
'image'
:
image
,
'label'
:
label
}
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
main
,
build_strategy
=
build_strategy
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
test_program
,
share_vars_from
=
train_exe
,
build_strategy
=
build_strategy
)
for
i
in
range
(
5
):
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
train_loss
,
=
train_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
avg_test_loss_val
=
np
.
array
(
test_loss
).
mean
()
if
math
.
isnan
(
float
(
avg_test_loss_val
)):
sys
.
exit
(
"got NaN loss, testing failed."
)
avg_train_loss_val
=
np
.
array
(
train_loss
).
mean
()
if
math
.
isnan
(
float
(
avg_train_loss_val
)):
sys
.
exit
(
"got NaN loss, training failed."
)
self
.
assertTrue
(
np
.
allclose
(
train_loss
,
test_loss
,
atol
=
1e-8
),
"Train loss: "
+
str
(
train_loss
)
+
"
\n
Test loss:"
+
str
(
test_loss
))
def
test_parallel_testing_with_new_strategy
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
pass_builder
=
build_strategy
.
_create_passes_from_strategy
()
origin_len
=
len
(
pass_builder
.
all_passes
())
viz_pass
=
pass_builder
.
append_pass
(
"graph_viz_pass"
)
self
.
assertEqual
(
origin_len
+
1
,
len
(
pass_builder
.
all_passes
()))
pass_builder
.
insert_pass
(
len
(
pass_builder
.
all_passes
()),
"graph_viz_pass"
)
self
.
assertEqual
(
origin_len
+
2
,
len
(
pass_builder
.
all_passes
()))
pass_builder
.
remove_pass
(
len
(
pass_builder
.
all_passes
())
-
1
)
self
.
assertEqual
(
origin_len
+
1
,
len
(
pass_builder
.
all_passes
()))
viz_pass
.
set_str
(
"graph_viz_path"
,
"/tmp/test_viz_pass"
)
self
.
check_network_convergence
(
use_cuda
=
core
.
is_compiled_with_cuda
(),
build_strategy
=
build_strategy
)
try
:
os
.
stat
(
"/tmp/test_viz_pass"
)
except
os
.
error
:
self
.
assertFalse
(
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/transformer_model.py
浏览文件 @
e79ad2ea
...
@@ -246,6 +246,7 @@ def prepare_encoder(src_word,
...
@@ -246,6 +246,7 @@ def prepare_encoder(src_word,
padding_idx
=
pos_pad_idx
,
padding_idx
=
pos_pad_idx
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
name
=
pos_enc_param_name
,
trainable
=
False
))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
enc_input
=
src_word_emb
+
src_pos_enc
# FIXME(guosheng): Decouple the program desc with batch_size.
# FIXME(guosheng): Decouple the program desc with batch_size.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录