Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
a318a490
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a318a490
编写于
12月 27, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into accelerate_ddpg
test=develop
上级
8ed02339
c0bcff00
变更
73
展开全部
隐藏空白更改
内联
并排
Showing
73 changed file
with
1508 addition
and
1021 deletion
+1508
-1021
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+20
-25
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-0
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+1
-1
paddle/fluid/framework/details/execution_strategy.h
paddle/fluid/framework/details/execution_strategy.h
+1
-1
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+198
-207
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+11
-8
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
...id/framework/details/scope_buffered_ssa_graph_executor.cc
+0
-1
paddle/fluid/framework/details/variable_visitor.cc
paddle/fluid/framework/details/variable_visitor.cc
+2
-2
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-1
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
...fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+14
-11
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-91
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+5
-7
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+0
-4
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+0
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+24
-61
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+9
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-3
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+21
-34
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+2
-10
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+20
-22
paddle/fluid/framework/var_type_inference_test.cc
paddle/fluid/framework/var_type_inference_test.cc
+1
-1
paddle/fluid/framework/var_type_traits.cc
paddle/fluid/framework/var_type_traits.cc
+119
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+195
-0
paddle/fluid/framework/var_type_traits_test.cc
paddle/fluid/framework/var_type_traits_test.cc
+120
-0
paddle/fluid/framework/variable.h
paddle/fluid/framework/variable.h
+35
-31
paddle/fluid/framework/variable_test.cc
paddle/fluid/framework/variable_test.cc
+12
-11
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+4
-4
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+1
-1
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-3
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+10
-0
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+1
-3
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+9
-30
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+11
-27
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+15
-61
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+12
-0
paddle/fluid/inference/tests/test.cmake
paddle/fluid/inference/tests/test.cmake
+5
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+3
-4
paddle/fluid/operators/conv_fusion_op.cu.cc
paddle/fluid/operators/conv_fusion_op.cu.cc
+2
-2
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+4
-237
paddle/fluid/operators/cudnn_rnn_cache.h
paddle/fluid/operators/cudnn_rnn_cache.h
+255
-0
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/detail/safe_ref.h
paddle/fluid/operators/detail/safe_ref.h
+1
-1
paddle/fluid/operators/distributed/proto_encoder_helper.h
paddle/fluid/operators/distributed/proto_encoder_helper.h
+3
-1
paddle/fluid/operators/distributed_ops/CMakeLists.txt
paddle/fluid/operators/distributed_ops/CMakeLists.txt
+1
-1
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.h
paddle/fluid/operators/elementwise/elementwise_mul_op.h
+1
-1
paddle/fluid/operators/lrn_mkldnn_op.cc
paddle/fluid/operators/lrn_mkldnn_op.cc
+4
-4
paddle/fluid/operators/optimizers/adadelta_op.h
paddle/fluid/operators/optimizers/adadelta_op.h
+4
-2
paddle/fluid/operators/optimizers/adagrad_op.h
paddle/fluid/operators/optimizers/adagrad_op.h
+2
-1
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+2
-1
paddle/fluid/operators/optimizers/adamax_op.h
paddle/fluid/operators/optimizers/adamax_op.h
+4
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+4
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+4
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+60
-28
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+19
-0
paddle/fluid/platform/float16_test.cc
paddle/fluid/platform/float16_test.cc
+1
-0
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+1
-0
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+0
-3
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+0
-5
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-2
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
python/paddle/fluid/tests/unittests/test_weight_decay.py
python/paddle/fluid/tests/unittests/test_weight_decay.py
+188
-0
未找到文件。
cmake/simd.cmake
浏览文件 @
a318a490
...
...
@@ -57,46 +57,43 @@ int main()
return 0;
}"
SSE3_FOUND
)
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
a318a490
...
...
@@ -7,27 +7,17 @@ function(windows_symbolic TARGET)
cmake_parse_arguments
(
windows_symbolic
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
final_path
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
windows_symbolic_PATH
}
)
foreach
(
src
${
windows_symbolic_SRCS
}
)
get_filename_component
(
src
${
src
}
NAME_WE
)
if
(
NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc OR NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cu
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
#only copy the xx.cu to.xx.cu when the content are modified
set
(
copy_flag 1
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
)
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc SOURCE_STR
)
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu TARGET_STR
)
if
(
SOURCE_STR STREQUAL TARGET_STR
)
set
(
copy_flag 0
)
endif
()
endif
()
if
(
copy_flag
)
add_custom_command
(
OUTPUT .
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
endif
(
copy_flag
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
get_filename_component
(
src
${
src
}
NAME_WE
)
if
(
NOT EXISTS
${
final_path
}
/
${
src
}
.cc OR NOT EXISTS
${
final_path
}
/
${
src
}
.cu
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
endfunction
()
...
...
@@ -78,18 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
variable_test SRCS variable_test.cc
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool xxhash
)
cc_library
(
var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto
)
if
(
WITH_GPU
)
target_link_libraries
(
var_type_traits dynload_cuda
)
endif
()
cc_test
(
var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool xxhash var_type_traits
)
cc_library
(
scope_pool SRCS scope_pool.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
variable_test SRCS variable_test.cc DEPS tensor var_type_traits
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
scope
)
if
(
WITH_GPU
)
if
(
WIN32
)
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
a318a490
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
a318a490
...
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
...
...
paddle/fluid/framework/details/execution_strategy.h
浏览文件 @
a318a490
...
...
@@ -25,7 +25,7 @@ struct ExecutionStrategy {
size_t
num_threads_
{
0
};
bool
use_cuda_
{
true
};
bool
allow_op_delay_
{
false
};
size_t
num_iteration_per_drop_scope_
{
1
};
size_t
num_iteration_per_drop_scope_
{
1
00
};
ExecutorType
type_
{
kDefault
};
bool
dry_run_
{
false
};
};
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
a318a490
此差异已折叠。
点击以展开。
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
a318a490
...
...
@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
#endif
int
GetVarDeviceID
(
const
ir
::
Graph
&
graph
,
const
std
::
string
&
varname
,
const
std
::
string
&
varname
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
...
...
@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
std
::
unordered_map
<
std
::
string
,
int
>
*
sharded_var_device
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
...
...
@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
int
dev_id
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
,
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
std
::
vector
<
ir
::
Node
*>
SortForReduceMode
(
const
std
::
vector
<
ir
::
Node
*>
&
)
const
;
int
GetOpDeviceID
(
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
shared_var_device
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
*
delay_ops
)
const
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
...
...
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
浏览文件 @
a318a490
...
...
@@ -85,7 +85,6 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
drop_scope_counter_
=
0
;
}
if
(
eptr
)
{
std
::
rethrow_exception
(
eptr
);
}
else
{
...
...
paddle/fluid/framework/details/variable_visitor.cc
浏览文件 @
a318a490
...
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
->
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
.
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
.
Type
()
));
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
a318a490
...
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
...
...
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
浏览文件 @
a318a490
...
...
@@ -40,18 +40,20 @@ framework::proto::OpDesc PrepareOpDesc(
const
std
::
string
&
output
)
{
auto
proto
=
base_desc
;
framework
::
OpDesc
desc
(
proto
,
nullptr
);
desc
.
SetType
(
"conv2d_fusion"
);
desc
.
SetInput
(
"Bias"
,
{
bias
});
desc
.
SetInput
(
"ResidualData"
,
{
bias1
});
desc
.
SetAttr
(
"activation"
,
activation
);
desc
.
SetOutput
(
"Output"
,
{
output
});
desc
.
SetAttr
(
"is_test"
,
true
);
desc
.
SetAttr
(
"use_cudnn"
,
false
);
desc
.
Flush
();
return
*
desc
.
Proto
();
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvElementwiseAdd2ActFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
pattern_name
=
"conv_elementwise_add_act_fuse"
;
const
std
::
string
pattern_name
=
"conv_elementwise_add
2
_act_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
GraphPatternDetector
gpd
;
...
...
@@ -76,22 +78,23 @@ std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
framework
::
OpDesc
new_op_desc
(
new_op_proto
,
nullptr
);
// Create a new node for the fused op.
graph
->
CreateOpNode
(
&
new_op_desc
);
auto
*
new_conv_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
// Link inputs and outputs.
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
conv_in_node
=
subgraph
.
at
(
x
);
IR_NODE_LINK_TO
(
conv_in_node
,
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_filter
,
conv_op
);
// Filter
IR_NODE_LINK_TO
(
conv_op
,
conv_out
);
// Output
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y_1
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
conv_in_node
,
new_
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_filter
,
new_
conv_op
);
// Filter
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y
_1
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
new_conv_op
,
act_out
);
// Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv_op
,
elementwise_add_op
,
elementwise_add_op_1
,
elementwise_add_out
});
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv_op
,
conv_out
,
elementwise_add_op
,
elementwise_add_op_1
,
elementwise_add_out
,
elementwise_add_out_1
,
act_op
});
};
gpd
(
graph
.
get
(),
handler
);
return
graph
;
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
a318a490
...
...
@@ -20,102 +20,11 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h"
DEFINE_bool
(
enforce_when_check_program
,
true
,
"Checking whether the program is correct or not. We will log "
"errors rather than throwing exceptions if this flag turned off"
);
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
{
void
CheckProgram
(
const
ProgramDesc
&
program
)
{
#define _INT(role) static_cast<int>(role)
std
::
map
<
int
,
bool
>
visit
;
for
(
OpDesc
*
op
:
program
.
Block
(
0
).
AllOps
())
{
// For backward compatibility, some program doesn't have role added.
if
(
!
op
->
HasAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
continue
;
int
role_id
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
visit
[
role_id
]
=
true
;
switch
(
role_id
)
{
case
_INT
(
OpRole
::
kForward
):
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward operator before forward operator "
<<
op
->
Type
();
}
break
;
case
_INT
(
OpRole
::
kBackward
):
case
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
):
if
(
!
FLAGS_enforce_when_check_program
)
{
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add backward operator %s after optimize operator."
,
op
->
Type
());
}
else
{
if
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward operator %s after optimize operator."
<<
op
->
Type
();
}
}
break
;
case
_INT
(
OpRole
::
kForward
)
|
_INT
(
OpRole
::
kLoss
):
if
(
!
FLAGS_enforce_when_check_program
)
{
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
))
==
visit
.
end
(),
"Cannot add backward|loss operator before "
"forward|loss operator %s."
,
op
->
Type
());
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add forward|loss operator %s after optimize operator."
,
op
->
Type
());
}
else
{
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward|loss operator before "
<<
"forward|loss operator %s."
<<
op
->
Type
();
}
if
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add forward|loss operator %s after optimize "
"operator."
<<
op
->
Type
();
}
}
break
;
case
_INT
(
OpRole
::
kOptimize
):
case
_INT
(
OpRole
::
kOptimize
)
|
_INT
(
OpRole
::
kLRSched
):
if
(
!
FLAGS_enforce_when_check_program
)
{
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
(),
"Optimize operators %s must follow backward operator."
,
op
->
Type
());
}
else
{
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
==
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Optimize operators %s must follow backward operator."
<<
op
->
Type
();
}
}
break
;
case
_INT
(
OpRole
::
kLRSched
):
case
_INT
(
OpRole
::
kDist
):
case
_INT
(
OpRole
::
kRPC
):
case
_INT
(
OpRole
::
kNotSpecified
):
break
;
default:
LOG
(
FATAL
)
<<
"Unknown operator role. Don't add new role because "
"you don't know what you are doing."
;
}
}
#undef _INT
}
}
// namespace
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
CheckProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
a318a490
...
...
@@ -1101,9 +1101,7 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
return
out_var
;
}
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"sigmoid"
,
"relu"
,
"relu6"
,
"relux"
,
"tanh"
,
"band_pass"
});
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"relu"
});
PDNode
*
patterns
::
ConvElementwiseaddAct
::
operator
()(
PDNode
*
conv_in
)
{
conv_in
->
AsInput
();
...
...
@@ -1169,13 +1167,13 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
->
AsInput
();
auto
elementwise_add_out
=
pattern
->
NewNode
(
elementwise_add_out_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
AsIntermediate
();
auto
elementwise_add_op_1
=
pattern
->
NewNode
(
elementwise_add_op_1_repr
())
->
assert_is_op
(
"elementwise_add"
);
auto
elementwise_add_in_y_1
=
pattern
->
NewNode
(
elementwise_add_in_y_1_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
AsInput
();
auto
elementwise_add_out_1
=
pattern
->
NewNode
(
elementwise_add_out_1_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
...
...
@@ -1203,8 +1201,8 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
conv_op
->
LinksFrom
({
conv_in
,
conv_filter
}).
LinksTo
({
conv_out
});
elementwise_add_op
->
LinksFrom
({
conv_out
,
elementwise_add_in_y
})
.
LinksTo
({
elementwise_add_out
});
elementwise_add_op_1
->
LinksFrom
(
{
elementwise_add_out
,
elementwise_add_in_y
_1
});
elementwise_add_op_1
->
LinksFrom
(
{
elementwise_add_out
,
elementwise_add_in_y_1
})
.
LinksTo
({
elementwise_add_out
_1
});
act_op
->
LinksFrom
({
elementwise_add_out_1
}).
LinksTo
({
act_out
});
return
act_out
;
}
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
a318a490
...
...
@@ -215,8 +215,8 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
}
...
...
@@ -261,8 +261,8 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
}
void
ImmutableCPU
()
const
{
...
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
a318a490
...
...
@@ -82,10 +82,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
a318a490
...
...
@@ -47,7 +47,6 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
a318a490
...
...
@@ -23,7 +23,8 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
a318a490
...
...
@@ -16,15 +16,10 @@ limitations under the License. */
#include <glog/logging.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
...
...
@@ -162,59 +157,27 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
try
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
#endif
}
// The profile has a process-wide mutex, results in serious performance
// issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
}
else
{
RunImpl
(
scope
,
place
);
}
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
}
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
}
else
{
RunImpl
(
scope
,
place
);
}
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
...
@@ -417,7 +380,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
.
Type
().
name
(
));
ToTypeName
(
var
.
Type
()
));
}
}
...
...
@@ -428,7 +391,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -522,7 +485,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"should be LoDTensor, but the received type is %s"
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -541,7 +504,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -570,7 +533,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
var
->
GetMutable
<
LoDTensor
>
();
});
return
res
;
...
...
@@ -812,7 +775,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -835,7 +798,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
a318a490
...
...
@@ -49,6 +49,8 @@ constexpr char kTempVarName[] = "@TEMP@";
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
constexpr
char
kGradVarSuffix
[]
=
"@GRAD"
;
constexpr
size_t
kGradVarSuffixSize
=
5U
;
/// Variables with this suffix are supposed to be filled up with zeros.
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
...
...
@@ -60,7 +62,11 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@";
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
return
var_name
+
kGradVarSuffix
;
std
::
string
result
;
result
.
reserve
(
var_name
.
size
()
+
kGradVarSuffixSize
);
result
+=
var_name
;
result
+=
kGradVarSuffix
;
return
result
;
}
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
);
...
...
@@ -110,8 +116,8 @@ class OperatorBase {
bool
HasAttr
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
template
<
typename
T
>
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
PADDLE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
name
);
PADDLE_ENFORCE
(
attrs_
.
find
(
name
)
!=
attrs_
.
end
()
,
"%s should be in AttributeMap"
,
name
);
return
boost
::
get
<
T
>
(
attrs_
.
at
(
name
));
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
a318a490
...
...
@@ -320,6 +320,7 @@ void ParallelExecutor::BCastParamsToDevices(
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
vector
<
void
*>
buffers
;
buffers
.
reserve
(
member_
->
places_
.
size
());
size_t
numel
=
main_tensor
.
numel
();
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
...
...
@@ -353,9 +354,7 @@ void ParallelExecutor::BCastParamsToDevices(
#endif
}
else
{
platform
::
CPUPlace
cpu
;
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
if
(
i
==
0
)
continue
;
for
(
size_t
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
auto
local_scope
=
member_
->
local_scopes_
[
i
];
auto
*
t
=
local_scope
->
Var
(
var
)
->
GetMutable
<
LoDTensor
>
();
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
a318a490
...
...
@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace
paddle
{
...
...
@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
Scope
*
child
=
new
Scope
(
this
);
{
SCOPE_KIDS_WRITER_LOCK
kids_
.
push_back
(
child
);
}
return
*
child
;
SCOPE_LOCK_GUARD
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
SCOPE_
VARS_WRITER_LOCK
SCOPE_
LOCK_GUARD
return
VarInternal
(
name
);
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
SCOPE_LOCK_GUARD
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
}
SCOPE_VARS_WRITER_LOCK
return
VarInternal
(
new_name
);
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
VARS_READER_LOCK
SCOPE_
LOCK_GUARD
return
FindVarInternal
(
name
);
}
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
VARS_READER_LOCK
SCOPE_
LOCK_GUARD
return
FindVarLocally
(
name
);
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
SCOPE_
VARS_READER_LOCK
SCOPE_
LOCK_GUARD
return
FindScopeInternal
(
var
);
}
void
Scope
::
DropKids
()
{
SCOPE_
KIDS_WRITER_LOCK
SCOPE_
LOCK_GUARD
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
SCOPE_
KIDS_READER_LOCK
SCOPE_
LOCK_GUARD
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
SCOPE_LOCK_GUARD
std
::
vector
<
std
::
string
>
known_vars
;
{
SCOPE_VARS_READER_LOCK
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
known_vars
.
emplace_back
(
p
.
first
);
}
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
known_vars
.
emplace_back
(
p
.
first
);
}
return
known_vars
;
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
SCOPE_
KIDS_WRITER_LOCK
SCOPE_
LOCK_GUARD
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"%p Cannot find %p as kid scope"
,
this
,
scope
);
...
...
@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
SCOPE_LOCK_GUARD
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
SCOPE_VARS_WRITER_LOCK
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
it
=
vars_
.
erase
(
it
);
...
...
@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
SCOPE_
VARS_WRITER_LOCK
SCOPE_
LOCK_GUARD
RenameInternal
(
origin_name
,
new_name
);
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
SCOPE_
VARS_WRITER_LOCK
SCOPE_
LOCK_GUARD
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
...
...
@@ -176,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
.
emplace
(
name
,
std
::
unique_ptr
<
Variable
>
(
v
)
);
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
}
...
...
paddle/fluid/framework/scope.h
浏览文件 @
a318a490
...
...
@@ -14,19 +14,12 @@ limitations under the License. */
#pragma once
extern
"C"
{
#include <xxhash.h>
}
#include <functional>
#include <list>
#include <m
emory>
#include <m
utex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -138,8 +131,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN
(
Scope
);
private:
mutable
RWLock
kids_lock_
;
mutable
RWLock
vars_lock_
;
mutable
std
::
mutex
mutex_
;
};
// Generate some debug string about the inherience structure of scope, quite
...
...
paddle/fluid/framework/var_type.h
浏览文件 @
a318a490
...
...
@@ -19,52 +19,50 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
inline
bool
IsType
(
const
std
::
type_index
&
type
_index
)
{
return
type
_index
==
std
::
type_index
(
typeid
(
T
)
);
inline
bool
IsType
(
const
std
::
type_index
&
type
)
{
return
type
==
typeid
(
T
);
}
inline
proto
::
VarType
::
Type
ToVarType
(
std
::
type_index
type
)
{
if
(
IsType
<
LoDTensor
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR
;
}
else
if
(
IsType
<
LoDRankTable
>
(
type
))
{
return
proto
::
VarType_Type_LOD_RANK_TABLE
;
}
else
if
(
IsType
<
LoDTensorArray
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR_ARRAY
;
}
else
if
(
IsType
<
SelectedRows
>
(
type
))
{
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
return
proto
::
VarType_Type_READER
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
inline
proto
::
VarType
::
Type
ToVarType
(
int
type
)
{
switch
(
type
)
{
case
proto
::
VarType
::
LOD_TENSOR
:
case
proto
::
VarType
::
SELECTED_ROWS
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
READER
:
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
default:
PADDLE_THROW
(
"ToVarType:Unsupported type %d"
,
type
);
}
}
template
<
typename
Visitor
>
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
switch
(
ToVarType
(
var
.
Type
()
))
{
case
proto
::
VarType
_Type_
LOD_TENSOR
:
switch
(
var
.
Type
(
))
{
case
proto
::
VarType
::
LOD_TENSOR
:
visitor
(
var
.
Get
<
LoDTensor
>
());
return
;
case
proto
::
VarType
_Type_
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
visitor
(
var
.
Get
<
LoDRankTable
>
());
return
;
case
proto
::
VarType
_Type_
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
visitor
(
var
.
Get
<
LoDTensorArray
>
());
return
;
case
proto
::
VarType
_Type_
SELECTED_ROWS
:
case
proto
::
VarType
::
SELECTED_ROWS
:
visitor
(
var
.
Get
<
SelectedRows
>
());
return
;
case
proto
::
VarType
_Type_
READER
:
case
proto
::
VarType
::
READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %
d"
,
ToVarTyp
e
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %
s"
,
ToTypeNam
e
(
var
.
Type
()));
}
}
...
...
paddle/fluid/framework/var_type_inference_test.cc
浏览文件 @
a318a490
...
...
@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op
->
InferVarType
(
prog
.
MutableBlock
(
0
));
ASSERT_EQ
(
proto
::
VarType
_Type_
LOD_TENSOR
,
ASSERT_EQ
(
proto
::
VarType
::
LOD_TENSOR
,
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
GetType
());
}
...
...
paddle/fluid/framework/var_type_traits.cc
0 → 100644
浏览文件 @
a318a490
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/macros.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
// Besides registering variable type id, it is helpful to register a
// var_id -> std::type_index map (for example, get type names according to id)
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
VarIdToTypeIndexMapInitializerImpl
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
id_to_type
,
MapType2
*
type_to_id
)
{
using
Type
=
typename
std
::
tuple_element
<
kStart
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
!
std
::
is_same
<
Type
,
void
>::
value
,
"Type cannot be void"
);
constexpr
int
kId
=
VarTypeTrait
<
Type
>::
kId
;
auto
type
=
std
::
type_index
(
typeid
(
Type
));
PADDLE_ENFORCE
(
id_to_type
->
count
(
kId
)
==
0
,
"Registered duplicate type id %d for type %s"
,
kId
,
type
.
name
());
PADDLE_ENFORCE
(
type_to_id
->
count
(
type
)
==
0
,
"Registered duplicate type_index %s for id %d"
,
type
.
name
(),
kId
);
id_to_type
->
emplace
(
kId
,
type
);
type_to_id
->
emplace
(
type
,
kId
);
VarIdToTypeIndexMapInitializerImpl
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Init
(
id_to_type
,
type_to_id
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
VarIdToTypeIndexMapInitializerImpl
<
kStart
,
kEnd
,
true
>
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
,
MapType2
*
)
{}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map and std::type_index -> var_id map
using
VarIdToTypeIndexMapInitializer
=
VarIdToTypeIndexMapInitializerImpl
<
0
,
VarTypeRegistry
::
kRegisteredTypeNum
,
VarTypeRegistry
::
kRegisteredTypeNum
==
0
>
;
struct
VarIdToTypeIndexMapHolder
{
DISABLE_COPY_AND_ASSIGN
(
VarIdToTypeIndexMapHolder
);
public:
static
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
auto
it
=
Instance
().
id_to_type_map_
.
find
(
var_id
);
PADDLE_ENFORCE
(
it
!=
Instance
().
id_to_type_map_
.
end
(),
"VarId %d is not registered."
,
var_id
);
return
it
->
second
;
}
static
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
auto
it
=
Instance
().
type_to_id_map_
.
find
(
type
);
PADDLE_ENFORCE
(
it
!=
Instance
().
type_to_id_map_
.
end
(),
"VarType %s is not registered."
,
type
.
name
());
return
it
->
second
;
}
private:
VarIdToTypeIndexMapHolder
()
{
VarIdToTypeIndexMapInitializer
::
Init
(
&
id_to_type_map_
,
&
type_to_id_map_
);
}
static
const
VarIdToTypeIndexMapHolder
&
Instance
()
{
static
const
VarIdToTypeIndexMapHolder
instance
;
return
instance
;
}
std
::
unordered_map
<
int
,
std
::
type_index
>
id_to_type_map_
;
std
::
unordered_map
<
std
::
type_index
,
int
>
type_to_id_map_
;
};
}
// namespace detail
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeIndex
(
var_id
);
}
const
char
*
ToTypeName
(
int
var_id
)
{
return
ToTypeIndex
(
var_id
).
name
();
}
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeId
(
type
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits.h
0 → 100644
浏览文件 @
a318a490
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#ifndef _WIN32
#include <nccl.h>
#endif
#endif
// Users should add forward declarations here
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
class
Communicator
;
#endif
#endif
}
// namespace platform
namespace
framework
{
class
Tensor
;
class
LoDTensor
;
class
SelectedRows
;
class
LoDRankTable
;
class
ReaderHolder
;
class
Scope
;
}
// namespace framework
namespace
operators
{
template
<
typename
T
>
class
AlgorithmsCache
;
class
CudnnRNNCache
;
namespace
reader
{
class
LoDTensorBlockingQueueHolder
;
}
// namespace reader
}
// namespace operators
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
const
char
*
ToTypeName
(
int
var_id
);
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
);
int
ToTypeId
(
const
std
::
type_index
&
type
);
namespace
detail
{
template
<
bool
kStop
,
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
,
typename
...
Args
>
struct
TypePosFinderImpl
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
TypePosFinderImpl
<
kStart
+
2
==
kEnd
,
kStart
+
1
,
kEnd
,
T1
,
Args
...
>::
kPos
;
};
template
<
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
>
struct
TypePosFinderImpl
<
true
,
kStart
,
kEnd
,
T1
,
T2
>
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
-
1
;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template
<
typename
T
,
typename
...
Args
>
struct
TypePosFinder
{
static
constexpr
int
kPos
=
TypePosFinderImpl
<
sizeof
...(
Args
)
==
1
,
0
,
sizeof
...(
Args
),
T
,
Args
...
>::
kPos
;
};
template
<
typename
...
Args
>
struct
VarTypeRegistryImpl
{
static
constexpr
size_t
kRegisteredTypeNum
=
sizeof
...(
Args
);
using
ArgTuple
=
std
::
tuple
<
Args
...
>
;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args..., return -1
template
<
typename
T
>
static
constexpr
int
TypePos
()
{
return
TypePosFinder
<
T
,
Args
...
>::
kPos
;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template
<
typename
T
>
static
constexpr
bool
IsRegistered
()
{
return
TypePos
<
T
>
()
>=
0
;
}
};
}
// namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = static_cast<int>(proto_id); \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*
* Caution: If you want to add more var types, please consider carefully
* whether you really need to add it.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
Tensor
,
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
std
::
string
,
Scope
*
,
std
::
map
<
size_t
,
Tensor
>
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId
,
platform
::
Communicator
,
#endif
operators
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
,
operators
::
CudnnRNNCache
,
#endif
int
,
float
>
;
template
<
typename
T
>
struct
VarTypeTrait
{
static_assert
(
VarTypeRegistry
::
IsRegistered
<
T
>
(),
"Must be registered type"
);
using
Type
=
T
;
/**
* Unique VarType Id generation.
*
* The auto-generated id should not be the same as any protobuf id defined in
* framework.proto. Therefore, we generate id by adding the type pos and
* maximum protobuf id (i.e., proto::VarType::TUPLE).
*
* However, we may need more protobuf id in the future.
* To avoid changing this auto id generation algorithm frequently, we
* generate id by adding the type pos and twice of maximum protobuf id (i.e.,
* proto::VarType::TUPLE).
*/
static
constexpr
int
kId
=
VarTypeRegistry
::
TypePos
<
T
>
()
+
static_cast
<
int
>
(
proto
::
VarType
::
TUPLE
)
*
2
;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto below
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensor
,
proto
::
VarType
::
LOD_TENSOR
);
REG_PROTO_VAR_TYPE_TRAIT
(
SelectedRows
,
proto
::
VarType
::
SELECTED_ROWS
);
REG_PROTO_VAR_TYPE_TRAIT
(
std
::
vector
<
Scope
*>
,
proto
::
VarType
::
STEP_SCOPES
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDRankTable
,
proto
::
VarType
::
LOD_RANK_TABLE
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
REG_PROTO_VAR_TYPE_TRAIT
(
int
,
proto
::
VarType
::
INT32
);
REG_PROTO_VAR_TYPE_TRAIT
(
float
,
proto
::
VarType
::
FP32
);
/** End of variable type registration */
template
<
typename
T
>
inline
constexpr
bool
IsRegisteredVarType
()
{
return
VarTypeRegistry
::
IsRegistered
<
T
>
();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits_test.cc
0 → 100644
浏览文件 @
a318a490
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <unordered_set>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
template
<
int
kPos
,
int
kEnd
,
bool
kStop
>
struct
TypeIndexChecker
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
var_id_set
,
SetType2
*
type_index_set
)
{
using
Type
=
typename
std
::
tuple_element
<
kPos
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
Type
>::
Type
,
Type
>::
value
,
"Type must be the same"
);
constexpr
auto
kId
=
VarTypeTrait
<
Type
>::
kId
;
std
::
type_index
actual_type
(
typeid
(
Type
));
EXPECT_EQ
(
std
::
string
(
ToTypeName
(
kId
)),
std
::
string
(
actual_type
.
name
()));
EXPECT_EQ
(
ToTypeIndex
(
kId
),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
actual_type
),
kId
);
EXPECT_EQ
(
ToTypeIndex
(
ToTypeId
(
actual_type
)),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
ToTypeIndex
(
kId
)),
kId
);
EXPECT_TRUE
(
var_id_set
->
count
(
kId
)
==
0
);
// NOLINT
EXPECT_TRUE
(
type_index_set
->
count
(
actual_type
)
==
0
);
// NOLINT
var_id_set
->
insert
(
kId
);
type_index_set
->
insert
(
std
::
type_index
(
typeid
(
Type
)));
TypeIndexChecker
<
kPos
+
1
,
kEnd
,
kPos
+
1
==
kEnd
>::
Check
(
var_id_set
,
type_index_set
);
}
};
template
<
int
kPos
,
int
kEnd
>
struct
TypeIndexChecker
<
kPos
,
kEnd
,
true
>
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
,
SetType2
*
)
{}
};
TEST
(
var_type_traits
,
check_no_duplicate_registry
)
{
constexpr
size_t
kRegisteredNum
=
VarTypeRegistry
::
kRegisteredTypeNum
;
std
::
unordered_set
<
int
>
var_id_set
;
std
::
unordered_set
<
std
::
type_index
>
type_index_set
;
TypeIndexChecker
<
0
,
kRegisteredNum
,
kRegisteredNum
==
0
>::
Check
(
&
var_id_set
,
&
type_index_set
);
}
template
<
typename
T
>
bool
CheckVarId
(
int
proto_id
)
{
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
T
>::
Type
,
T
>::
value
,
"Type must be the same"
);
return
VarTypeTrait
<
T
>::
kId
==
proto_id
;
}
TEST
(
var_type_traits
,
check_proto_type_id
)
{
ASSERT_TRUE
(
CheckVarId
<
LoDTensor
>
(
proto
::
VarType
::
LOD_TENSOR
));
ASSERT_TRUE
(
CheckVarId
<
SelectedRows
>
(
proto
::
VarType
::
SELECTED_ROWS
));
ASSERT_TRUE
(
CheckVarId
<
std
::
vector
<
Scope
*>>
(
proto
::
VarType
::
STEP_SCOPES
));
ASSERT_TRUE
(
CheckVarId
<
LoDRankTable
>
(
proto
::
VarType
::
LOD_RANK_TABLE
));
ASSERT_TRUE
(
CheckVarId
<
LoDTensorArray
>
(
proto
::
VarType
::
LOD_TENSOR_ARRAY
));
ASSERT_TRUE
(
CheckVarId
<
platform
::
PlaceList
>
(
proto
::
VarType
::
PLACE_LIST
));
ASSERT_TRUE
(
CheckVarId
<
ReaderHolder
>
(
proto
::
VarType
::
READER
));
ASSERT_TRUE
(
CheckVarId
<
int
>
(
proto
::
VarType
::
INT32
));
ASSERT_TRUE
(
CheckVarId
<
float
>
(
proto
::
VarType
::
FP32
));
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR
,
proto
::
VarType
::
LOD_TENSOR
);
ASSERT_EQ
(
proto
::
VarType_Type_SELECTED_ROWS
,
proto
::
VarType
::
SELECTED_ROWS
);
ASSERT_EQ
(
proto
::
VarType_Type_STEP_SCOPES
,
proto
::
VarType
::
STEP_SCOPES
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_RANK_TABLE
,
proto
::
VarType
::
LOD_RANK_TABLE
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR_ARRAY
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
ASSERT_EQ
(
proto
::
VarType_Type_PLACE_LIST
,
proto
::
VarType
::
PLACE_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_READER
,
proto
::
VarType
::
READER
);
ASSERT_EQ
(
proto
::
VarType_Type_FEED_MINIBATCH
,
proto
::
VarType
::
FEED_MINIBATCH
);
ASSERT_EQ
(
proto
::
VarType_Type_FETCH_LIST
,
proto
::
VarType
::
FETCH_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_RAW
,
proto
::
VarType
::
RAW
);
ASSERT_EQ
(
proto
::
VarType_Type_TUPLE
,
proto
::
VarType
::
TUPLE
);
ASSERT_EQ
(
proto
::
VarType_Type_INT32
,
proto
::
VarType
::
INT32
);
ASSERT_EQ
(
proto
::
VarType_Type_FP32
,
proto
::
VarType
::
FP32
);
}
TEST
(
var_type_traits
,
test_registry
)
{
using
Registry
=
detail
::
VarTypeRegistryImpl
<
int8_t
,
int32_t
,
size_t
,
double
>
;
ASSERT_TRUE
(
Registry
::
TypePos
<
int8_t
>
()
==
0
);
ASSERT_TRUE
(
Registry
::
TypePos
<
int32_t
>
()
==
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
size_t
>
()
==
2
);
ASSERT_TRUE
(
Registry
::
TypePos
<
double
>
()
==
3
);
ASSERT_TRUE
(
Registry
::
TypePos
<
float
>
()
==
-
1
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/variable.h
浏览文件 @
a318a490
...
...
@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/
platform/enforce
.h"
#include "paddle/fluid/
framework/var_type_traits
.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -27,10 +27,14 @@ class Variable {
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
...
...
@@ -39,61 +43,61 @@ class Variable {
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
new
T
()
));
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
!=
nullptr
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
holder_
->
Type
());
return
holder_
&&
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
std
::
type_index
Type
()
const
{
int
Type
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
return
holder_
->
Type
();
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_info
&
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
~
Placeholder
()
=
default
;
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
T
*
ptr
)
:
ptr_
(
ptr
),
type_
(
typeid
(
T
))
{}
virtual
const
std
::
type_info
&
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
()
);
}
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
VarTypeTrait
<
T
>::
kId
);
}
std
::
unique_ptr
<
T
>
ptr_
;
const
std
::
type_info
&
type
_
;
private:
T
obj
_
;
};
std
::
unique_ptr
<
Placeholder
>
holder_
;
// pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend
class
Scope
;
const
std
::
string
*
name_
;
// pointers to a PlaceholderImpl object indeed.
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/variable_test.cc
浏览文件 @
a318a490
...
...
@@ -16,27 +16,28 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
TEST
(
Variable
,
GetMutable
)
{
using
paddle
::
framework
::
Variable
;
struct
Tensor
{
int
content_
;
};
namespace
paddle
{
namespace
framework
{
TEST
(
Variable
,
GetMutable
)
{
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
Tensor
*
t
=
v
->
GetMutable
<
Tensor
>
();
t
->
content_
=
1234
;
auto
*
t
=
v
->
GetMutable
<
std
::
string
>
();
*
t
=
"1234"
;
const
Tensor
&
tt
=
v
->
Get
<
Tensor
>
();
EXPECT_EQ
(
1234
,
tt
.
content_
);
const
auto
&
tt
=
v
->
Get
<
std
::
string
>
();
EXPECT_EQ
(
"1234"
,
tt
);
try
{
v
->
GetMutable
<
std
::
string
>
();
v
->
GetMutable
<
Tensor
>
();
}
catch
(
std
::
exception
&
e
)
{
return
;
}
EXPECT_TRUE
(
false
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
a318a490
...
...
@@ -69,17 +69,17 @@ void TestWord2vecPrediction(const std::string& model_path) {
std
::
vector
<
PaddleTensor
>
outputs
;
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
size
(),
1UL
);
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
static_cast
<
size_t
>
(
5UL
),
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())
[
i
];
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
]
<<
" result: "
<<
result
[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
a318a490
...
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
IsType
<
framework
::
LoDTensorArray
>
(
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
a318a490
...
...
@@ -27,8 +27,11 @@ namespace details {
// training phase.
struct
TensorArrayBatchCleaner
{
TensorArrayBatchCleaner
()
{
valid_types_
.
insert
(
typeid
(
framework
::
Tensor
));
valid_types_
.
insert
(
typeid
(
framework
::
LoDTensor
));
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
...
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool
no_tensor_flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
unordered_set
<
std
::
type_index
>
valid_types_
;
std
::
unordered_set
<
int
>
valid_types_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
};
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
a318a490
...
...
@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
,
const
std
::
vector
<
size_t
>
&
lod
)
{
int
size
=
lod
[
lod
.
size
()
-
1
];
tensor
->
shape
.
assign
({
size
,
1
});
tensor
->
lod
.
assign
({
lod
});
TensorAssignData
(
tensor
,
data
);
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
a318a490
...
...
@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto
one_batch
=
data
->
NextBatch
();
PaddleTensor
input_tensor
;
input_tensor
.
name
=
"word"
;
input_tensor
.
shape
.
assign
({
static_cast
<
int
>
(
one_batch
.
data
.
size
()),
1
});
input_tensor
.
lod
.
assign
({
one_batch
.
lod
});
input_tensor
.
dtype
=
PaddleDType
::
INT64
;
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
});
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
}
,
one_batch
.
lod
);
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
a318a490
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
query
_data_all
,
title_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
query
,
title
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
query_data_all
.
size
())
{
data
.
query_data_all
.
assign
(
query_data_all
.
begin
()
+
batch_iter
,
query_data_all
.
begin
()
+
batch_end
);
data
.
title_data_all
.
assign
(
title_data_all
.
begin
()
+
batch_iter
,
title_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod1
.
push_back
(
0
);
data
.
lod2
.
push_back
(
0
);
CHECK
(
!
data
.
query_data_all
.
empty
());
CHECK
(
!
data
.
title_data_all
.
empty
());
CHECK_EQ
(
data
.
query_data_all
.
size
(),
data
.
title_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
query_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod1
.
push_back
(
data
.
lod1
.
back
()
+
data
.
query_data_all
[
j
].
size
());
data
.
lod2
.
push_back
(
data
.
lod2
.
back
()
+
data
.
title_data_all
[
j
].
size
());
}
if
(
batch_end
<=
query
.
size
())
{
GetInputPerBatch
(
query
,
&
data
.
query
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title
,
&
data
.
title
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
std
::
vector
<
int64_t
>
title_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
query
_data_all
.
push_back
(
std
::
move
(
query_data
));
title
_data_all
.
push_back
(
std
::
move
(
title_data
));
query
.
push_back
(
std
::
move
(
query_data
));
title
.
push_back
(
std
::
move
(
title_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor
.
name
=
"left"
;
lod_title_tensor
.
name
=
"right"
;
auto
one_batch
=
data
->
NextBatch
();
int
size1
=
one_batch
.
lod1
[
one_batch
.
lod1
.
size
()
-
1
];
// token batch size
int
size2
=
one_batch
.
lod2
[
one_batch
.
lod2
.
size
()
-
1
];
// token batch size
lod_query_tensor
.
shape
.
assign
({
size1
,
1
});
lod_query_tensor
.
lod
.
assign
({
one_batch
.
lod1
});
lod_title_tensor
.
shape
.
assign
({
size2
,
1
});
lod_title_tensor
.
lod
.
assign
({
one_batch
.
lod2
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
,
one_batch
.
lod2
);
// Set inputs.
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
a318a490
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word
_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word
,
mention
;
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
word_data_all
.
size
())
{
data
.
word_data_all
.
assign
(
word_data_all
.
begin
()
+
batch_iter
,
word_data_all
.
begin
()
+
batch_end
);
data
.
mention_data_all
.
assign
(
mention_data_all
.
begin
()
+
batch_iter
,
mention_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod
.
push_back
(
0
);
CHECK
(
!
data
.
word_data_all
.
empty
());
CHECK
(
!
data
.
mention_data_all
.
empty
());
CHECK_EQ
(
data
.
word_data_all
.
size
(),
data
.
mention_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
word_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod
.
push_back
(
data
.
lod
.
back
()
+
data
.
word_data_all
[
j
].
size
());
}
if
(
batch_end
<=
word
.
size
())
{
GetInputPerBatch
(
word
,
&
data
.
word
,
&
data
.
lod
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
mention
,
&
data
.
mention
,
&
data
.
lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
std
::
vector
<
int64_t
>
mention_data
;
split_to_int64
(
data
[
3
],
' '
,
&
mention_data
);
word
_data_all
.
push_back
(
std
::
move
(
word_data
));
mention
_data_all
.
push_back
(
std
::
move
(
mention_data
));
word
.
push_back
(
std
::
move
(
word_data
));
mention
.
push_back
(
std
::
move
(
mention_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor
.
name
=
"word"
;
lod_mention_tensor
.
name
=
"mention"
;
auto
one_batch
=
data
->
NextBatch
();
int
size
=
one_batch
.
lod
[
one_batch
.
lod
.
size
()
-
1
];
// token batch size
lod_word_tensor
.
shape
.
assign
({
size
,
1
});
lod_word_tensor
.
lod
.
assign
({
one_batch
.
lod
});
lod_mention_tensor
.
shape
.
assign
({
size
,
1
});
lod_mention_tensor
.
lod
.
assign
({
one_batch
.
lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word
,
one_batch
.
lod
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention
,
one_batch
.
lod
);
// Set inputs.
input_slots
->
assign
({
lod_word_tensor
,
lod_mention_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
a318a490
...
...
@@ -18,12 +18,9 @@ namespace paddle {
namespace
inference
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
,
l1_lod
;
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
title1_all
.
begin
()
+
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
title2_all
.
begin
()
+
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
if
(
batch_end
<=
title1
.
size
())
{
GetInputPerBatch
(
title1
,
&
data
.
title1
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title2
,
&
data
.
title2
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title3
,
&
data
.
title3
,
&
data
.
lod3
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
l1
,
&
data
.
l1
,
&
data
.
l1_lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1
_all
.
push_back
(
std
::
move
(
title1_data
));
title2
_all
.
push_back
(
std
::
move
(
title2_data
));
title3
_all
.
push_back
(
std
::
move
(
title3_data
));
l1
_all
.
push_back
(
std
::
move
(
l1_data
));
title1
.
push_back
(
std
::
move
(
title1_data
));
title2
.
push_back
(
std
::
move
(
title2_data
));
title3
.
push_back
(
std
::
move
(
title3_data
));
l1
.
push_back
(
std
::
move
(
l1_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
,
one_batch
.
lod2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
,
one_batch
.
lod3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
,
one_batch
.
l1_lod
);
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
a318a490
...
...
@@ -176,6 +176,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(
*
inputs
).
emplace_back
(
input_slots
);
}
void
GetInputPerBatch
(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
in
,
std
::
vector
<
std
::
vector
<
int64_t
>>
*
out
,
std
::
vector
<
size_t
>
*
lod
,
size_t
batch_iter
,
size_t
batch_end
)
{
lod
->
clear
();
lod
->
push_back
(
0
);
for
(
auto
it
=
in
.
begin
()
+
batch_iter
;
it
<
in
.
begin
()
+
batch_end
;
it
++
)
{
out
->
push_back
(
*
it
);
lod
->
push_back
(
lod
->
back
()
+
(
*
it
).
size
());
// calculate lod
}
}
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
...
...
paddle/fluid/inference/tests/test.cmake
浏览文件 @
a318a490
...
...
@@ -3,14 +3,16 @@ set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories."
)
function
(
inference_download install_dir url filename
)
message
(
STATUS
"Download inference test stuff from
${
url
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
install_dir
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& wget -q
${
url
}
/
${
filename
}
"
)
file
(
DOWNLOAD
"
${
url
}
/
${
filename
}
"
"
${
install_dir
}
/
${
filename
}
"
)
message
(
STATUS
"finish downloading
${
filename
}
"
)
endfunction
()
function
(
inference_download_and_uncompress install_dir url filename
)
inference_download
(
${
install_dir
}
${
url
}
${
filename
}
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& tar xzf
${
filename
}
"
)
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E tar xzf
${
install_dir
}
/
${
filename
}
WORKING_DIRECTORY
${
install_dir
}
)
endfunction
()
set
(
WORD2VEC_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/word2vec"
)
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
a318a490
...
...
@@ -46,7 +46,7 @@ endif()
register_operators
(
EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
a318a490
...
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
{
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
in_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
in_var
->
Type
()
));
}
PADDLE_ENFORCE_NOT_NULL
(
input
);
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
a318a490
...
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
"Cannot find inside gradient %s"
,
inside_og_name
);
if
(
framework
::
IsType
<
framework
::
LoDTensor
>
(
og_outside
.
Type
()
))
{
if
(
og_outside
.
IsType
<
framework
::
LoDTensor
>
(
))
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
framework
::
IsType
<
framework
::
LoDTensorArray
>
(
og_outside
.
Type
()))
{
}
else
if
(
og_outside
.
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
...
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var
->
IsType
<
LoDTensor
>
(),
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s."
,
inside_grad_name
,
var
->
Type
().
name
(
));
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
()
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/conv_fusion_op.cu.cc
浏览文件 @
a318a490
...
...
@@ -22,7 +22,7 @@ DECLARE_bool(cudnn_exhaustive_search);
namespace
paddle
{
namespace
operators
{
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
using
Tensor
=
framework
::
Tensor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
...
...
@@ -204,7 +204,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
conv2d_fusion
,
ops
::
CUDNNConvFusionOpKernel
<
float
>
,
ops
::
CUDNNConvFusionOpKernel
<
double
>
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
a318a490
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,239 +22,6 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
Tensor
reserve_data_
;
Tensor
workspace_data_
;
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
framework
::
ExecutionContext
&
ctx
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
template
<
typename
T
>
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input
_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
cudnn_rnn_cache
->
init
(
handle
,
ctx
.
GetPlace
(),
max_len
,
batch
_size
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
}
auto
run_seq_len
=
x
->
dims
()[
0
];
...
...
paddle/fluid/operators/cudnn_rnn_cache.h
0 → 100644
浏览文件 @
a318a490
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
framework
::
Tensor
reserve_data_
;
framework
::
Tensor
workspace_data_
;
framework
::
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
platform
::
Place
&
place
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
place
);
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
place
);
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
place
);
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/cum_op.h
浏览文件 @
a318a490
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/detail/safe_ref.h
浏览文件 @
a318a490
...
...
@@ -25,7 +25,7 @@ namespace detail {
*/
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
args
...
);
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
::
paddle
::
string
::
Sprintf
(
args
...)
);
return
*
ptr
;
}
...
...
paddle/fluid/operators/distributed/proto_encoder_helper.h
浏览文件 @
a318a490
...
...
@@ -84,7 +84,9 @@ class ProtoEncodeHelper {
~
ProtoEncodeHelper
()
{
#define REPLACE_ENFORCE_GLOG 1
// Make sure callers didn't do operations that went over max_size promised
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
if
(
paddle
::
platform
::
is_error
(
p_
<=
limit_
))
{
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
}
#undef REPLACE_ENFORCE_GLOG
}
...
...
paddle/fluid/operators/distributed_ops/CMakeLists.txt
浏览文件 @
a318a490
...
...
@@ -33,7 +33,7 @@ register_operators(EXCLUDES gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS})
if
(
WITH_GPU AND NOT WIN32
)
set
(
DISTRIBUTE_DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
endif
()
set
(
OPERATOR_DEPS
${
OPERATOR_DEPS
}
${
DISTRIBUTE_DEPS
}
PARENT_SCOPE
)
...
...
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
a318a490
...
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
}
else
{
PADDLE_THROW
(
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
ctx
.
Inputs
(
"Ids"
)[
0
],
ids_var
->
Type
().
name
(
));
ctx
.
Inputs
(
"Ids"
)[
0
],
framework
::
ToTypeName
(
ids_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.h
浏览文件 @
a318a490
...
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
x_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_var
->
Type
()
));
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/lrn_mkldnn_op.cc
浏览文件 @
a318a490
...
...
@@ -50,8 +50,8 @@ template <typename T>
class
LRNMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
"MKLDNN LRN must use float data."
);
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
...
...
@@ -132,8 +132,8 @@ template <typename T>
class
LRNMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
"MKLDNN LRN must use float data."
);
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
PADDLE_ENFORCE
(
...
...
paddle/fluid/operators/optimizers/adadelta_op.h
浏览文件 @
a318a490
...
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
...
...
paddle/fluid/operators/optimizers/adagrad_op.h
浏览文件 @
a318a490
...
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
a318a490
...
...
@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/optimizers/adamax_op.h
浏览文件 @
a318a490
...
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
a318a490
...
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
a318a490
...
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
a318a490
...
...
@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s"
,
grad_var
->
Type
().
name
(
)));
framework
::
ToTypeName
(
grad_var
->
Type
()
)));
}
}
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
a318a490
...
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
a318a490
...
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
a318a490
...
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_vars
[
0
]
->
Type
()
));
}
};
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
a318a490
...
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
a318a490
...
...
@@ -140,68 +140,72 @@ struct EOFException : public std::exception {
#define LIKELY(condition) (condition)
#endif
inline
bool
is_error
(
bool
stat
)
{
return
!
stat
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
bool
stat
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
!
(
stat
)))
{
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
#ifdef PADDLE_WITH_CUDA
inline
bool
is_error
(
cudaError_t
e
)
{
return
UNLIKELY
(
e
);
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudaError_t
e
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
e
))
{
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
curandStatus_t
stat
)
{
return
stat
!=
CURAND_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
curandStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
!=
CURAND_STATUS_SUCCESS
)
{
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
cudnnStatus_t
stat
)
{
return
stat
!=
CUDNN_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
==
CUDNN_STATUS_SUCCESS
)
{
return
;
}
else
{
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
cublasStatus_t
stat
)
{
return
stat
!=
CUBLAS_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
std
::
string
err
;
if
(
stat
==
CUBLAS_STATUS_SUCCESS
)
{
return
;
}
else
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
err
=
"CUBLAS: not initialized, "
;
}
else
if
(
stat
==
CUBLAS_STATUS_ALLOC_FAILED
)
{
err
=
"CUBLAS: alloc failed, "
;
...
...
@@ -254,21 +258,49 @@ inline void throw_on_error(T e) {
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#define __PADDLE_THROW_ERROR_I(_, _9, _8, _7, _6, _5, _4, _3, _2, X_, ...) X_;
#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \
::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG));
#define __PADDLE_THROW_ON_ERROR(COND, ...) \
__PADDLE_THROW_ERROR_I( \
__VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__))
#define __PADDLE_UNARY_COMPARE(COND, ...) \
do { \
auto __cond = COND; \
if (UNLIKELY(::paddle::platform::is_error(__cond))) { \
__PADDLE_THROW_ON_ERROR(__cond, __VA_ARGS__); \
} \
} while (0)
#ifndef REPLACE_ENFORCE_GLOG
#define
PADDLE_ENFORCE(...)
\
#define
__PADDLE_ENFORCE_I(COND, ...)
\
do { \
try { \
::paddle::platform::throw_on_error(__VA_ARGS__);
\
__PADDLE_UNARY_COMPARE(COND, __VA_ARGS__);
\
} catch (...) { \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
} \
} while (
false
)
} while (
0
)
#else
#define
PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(
__VA_ARGS__);
#define
__PADDLE_ENFORCE_I(COND, ...) __PADDLE_UNARY_COMPARE(COND,
__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#define __PADDLE_ENFORCE(__args) __PADDLE_ENFORCE_I __args
#define PADDLE_ENFORCE(...) __PADDLE_ENFORCE((__VA_ARGS__))
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
a318a490
...
...
@@ -37,6 +37,25 @@ TEST(ENFORCE, FAILED) {
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok 123 at all"
));
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
,
"Enforce is not ok at all"
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_TRUE
(
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok at all"
));
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_NE
(
std
::
string
(
error
.
what
()).
find
(
" at "
),
0
);
}
EXPECT_TRUE
(
caught_exception
);
}
TEST
(
ENFORCE
,
NO_ARG_OK
)
{
...
...
paddle/fluid/platform/float16_test.cc
浏览文件 @
a318a490
...
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "gtest/gtest.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/init.h"
...
...
paddle/fluid/platform/float16_test.cu
浏览文件 @
a318a490
...
...
@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <bitset>
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
a318a490
...
...
@@ -49,9 +49,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
// namespace pybind
...
...
paddle/fluid/string/printf.h
浏览文件 @
a318a490
...
...
@@ -87,7 +87,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) {
template
<
typename
...
Args
>
std
::
string
Sprintf
(
const
Args
&
...
args
)
{
std
::
ostringstream
oss
;
Fprintf
(
oss
,
"
"
);
Fprintf
(
oss
,
"
%s"
,
args
...
);
return
oss
.
str
();
}
...
...
python/paddle/fluid/framework.py
浏览文件 @
a318a490
...
...
@@ -20,7 +20,6 @@ import os
import
re
import
six
import
sys
import
traceback
import
numpy
as
np
...
...
@@ -605,10 +604,6 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
return
if
type
is
None
:
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
a318a490
...
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
# step
5: get persistable_vars, parameter
_vars, places. persistable_vars
# step
6: get persistable
_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars
=
set
([
cpt
.
to_text
(
v
.
name
)
for
v
in
[
...
...
@@ -164,7 +164,7 @@ class ParallelExecutor(object):
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step
6
: init ParallelExecutor
# step
7
: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
places
,
persistable_vars
,
main
.
desc
,
cpt
.
to_text
(
loss_name
)
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
a318a490
...
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
,
"op_callstack"
"op_namescope"
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
python/paddle/fluid/tests/unittests/test_weight_decay.py
0 → 100644
浏览文件 @
a318a490
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
def
get_places
():
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
@
contextlib
.
contextmanager
def
prog_scope_guard
(
main_prog
,
startup_prog
):
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
yield
def
bow_net
(
data
,
label
,
dict_dim
,
is_sparse
=
False
,
emb_dim
=
128
,
hid_dim
=
128
,
hid_dim2
=
96
,
class_dim
=
2
):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
is_sparse
=
is_sparse
,
size
=
[
dict_dim
,
emb_dim
])
bow
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
bow_tanh
=
fluid
.
layers
.
tanh
(
bow
)
fc_1
=
fluid
.
layers
.
fc
(
input
=
bow_tanh
,
size
=
hid_dim
,
act
=
"tanh"
)
fc_2
=
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
hid_dim2
,
act
=
"tanh"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
avg_cost
class
TestWeightDecay
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
self
.
word_dict
),
batch_size
=
4
)()
self
.
train_data
=
[
next
(
reader
)
for
_
in
range
(
5
)]
self
.
learning_rate
=
.
5
def
run_executor
(
self
,
place
,
feed_list
,
loss
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
main_prog
=
fluid
.
default_main_program
()
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
exe
.
run
(
main_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
run_parallel_exe
(
self
,
place
,
feed_list
,
loss
,
use_cuda
=
True
,
use_reduce
=
False
,
use_fast_executor
=
False
,
use_ir_memory_optimize
=
False
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
,
build_strategy
=
build_strategy
)
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
parallel_exe
.
run
(
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
check_weight_decay
(
self
,
place
,
model
,
use_parallel_exe
=
False
,
use_reduce
=
False
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
prog_scope_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
param_list
=
[(
var
,
var
*
self
.
learning_rate
)
for
var
in
main_prog
.
block
(
0
).
all_parameters
()]
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
self
.
learning_rate
)
optimizer
.
minimize
(
avg_cost
)
for
params
in
param_list
:
updated_p
=
fluid
.
layers
.
elementwise_sub
(
x
=
params
[
0
],
y
=
params
[
1
])
fluid
.
layers
.
assign
(
input
=
updated_p
,
output
=
params
[
0
])
if
use_parallel_exe
:
loss
=
self
.
run_parallel_exe
(
place
,
[
data
,
label
],
loss
=
avg_cost
,
use_cuda
=
True
,
use_reduce
=
use_reduce
)
else
:
loss
=
self
.
run_executor
(
place
,
[
data
,
label
],
loss
=
avg_cost
)
return
loss
def
test_weight_decay
(
self
):
model
=
partial
(
bow_net
,
is_sparse
=
False
)
for
place
in
get_places
():
loss
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
False
)
loss2
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
False
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss2
[
i
],
rtol
=
5e-5
)
loss3
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
True
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss3
[
i
],
rtol
=
5e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录