Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
50fce879
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
50fce879
编写于
12月 13, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/add_prefech_hs
上级
c2e851f7
30aad884
变更
80
显示空白变更内容
内联
并排
Showing
80 changed file
with
677 addition
and
250 deletion
+677
-250
CMakeLists.txt
CMakeLists.txt
+7
-1
cmake/FindGperftools.cmake
cmake/FindGperftools.cmake
+63
-0
cmake/configure.cmake
cmake/configure.cmake
+1
-0
cmake/generic.cmake
cmake/generic.cmake
+16
-0
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+15
-9
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+8
-5
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+14
-15
paddle/fluid/framework/ngraph_bridge.h
paddle/fluid/framework/ngraph_bridge.h
+0
-3
paddle/fluid/framework/ngraph_operator.cc
paddle/fluid/framework/ngraph_operator.cc
+28
-31
paddle/fluid/framework/ngraph_operator.h
paddle/fluid/framework/ngraph_operator.h
+3
-6
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+1
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+30
-1
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
...le/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+4
-3
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+1
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+14
-2
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+3
-0
paddle/fluid/inference/utils/benchmark.cc
paddle/fluid/inference/utils/benchmark.cc
+1
-1
paddle/fluid/inference/utils/visualizer.cc
paddle/fluid/inference/utils/visualizer.cc
+5
-5
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+7
-8
paddle/fluid/operators/bilinear_tensor_product_op.cu
paddle/fluid/operators/bilinear_tensor_product_op.cu
+0
-1
paddle/fluid/operators/concat_mkldnn_op.cc
paddle/fluid/operators/concat_mkldnn_op.cc
+152
-0
paddle/fluid/operators/concat_op.cc
paddle/fluid/operators/concat_op.cc
+24
-1
paddle/fluid/operators/cos_sim_op.cu
paddle/fluid/operators/cos_sim_op.cu
+0
-2
paddle/fluid/operators/crop_op.cu
paddle/fluid/operators/crop_op.cu
+0
-2
paddle/fluid/operators/distributed/brpc_client.cc
paddle/fluid/operators/distributed/brpc_client.cc
+1
-1
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+1
-2
paddle/fluid/operators/dropout_op.cu
paddle/fluid/operators/dropout_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_add_op.cu
paddle/fluid/operators/elementwise/elementwise_add_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_max_op.cu
paddle/fluid/operators/elementwise/elementwise_max_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_min_op.cu
paddle/fluid/operators/elementwise/elementwise_min_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_pow_op.cu
paddle/fluid/operators/elementwise/elementwise_pow_op.cu
+0
-2
paddle/fluid/operators/elementwise/elementwise_sub_op.cu
paddle/fluid/operators/elementwise/elementwise_sub_op.cu
+0
-2
paddle/fluid/operators/expand_op.cu
paddle/fluid/operators/expand_op.cu
+0
-3
paddle/fluid/operators/gru_unit_op.cu
paddle/fluid/operators/gru_unit_op.cu
+0
-2
paddle/fluid/operators/hinge_loss_op.cu
paddle/fluid/operators/hinge_loss_op.cu
+0
-2
paddle/fluid/operators/huber_loss_op.cu
paddle/fluid/operators/huber_loss_op.cu
+0
-2
paddle/fluid/operators/im2sequence_op.cu
paddle/fluid/operators/im2sequence_op.cu
+0
-2
paddle/fluid/operators/isfinite_op.cu
paddle/fluid/operators/isfinite_op.cu
+0
-2
paddle/fluid/operators/l1_norm_op.cu
paddle/fluid/operators/l1_norm_op.cu
+0
-2
paddle/fluid/operators/log_loss_op.cu
paddle/fluid/operators/log_loss_op.cu
+0
-2
paddle/fluid/operators/math/context_project.cu
paddle/fluid/operators/math/context_project.cu
+0
-3
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
+8
-5
paddle/fluid/operators/math/math_function.cu
paddle/fluid/operators/math/math_function.cu
+0
-2
paddle/fluid/operators/math/sequence2batch.cu
paddle/fluid/operators/math/sequence2batch.cu
+0
-2
paddle/fluid/operators/math/softmax.cu
paddle/fluid/operators/math/softmax.cu
+0
-3
paddle/fluid/operators/mean_op.cu
paddle/fluid/operators/mean_op.cu
+0
-3
paddle/fluid/operators/optimizers/adadelta_op.cu
paddle/fluid/operators/optimizers/adadelta_op.cu
+0
-2
paddle/fluid/operators/optimizers/adagrad_op.cu
paddle/fluid/operators/optimizers/adagrad_op.cu
+0
-2
paddle/fluid/operators/optimizers/adam_op.cu
paddle/fluid/operators/optimizers/adam_op.cu
+0
-2
paddle/fluid/operators/optimizers/adamax_op.cu
paddle/fluid/operators/optimizers/adamax_op.cu
+0
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.cu
paddle/fluid/operators/optimizers/decayed_adagrad_op.cu
+0
-2
paddle/fluid/operators/optimizers/ftrl_op.cu
paddle/fluid/operators/optimizers/ftrl_op.cu
+0
-2
paddle/fluid/operators/optimizers/proximal_adagrad_op.cu
paddle/fluid/operators/optimizers/proximal_adagrad_op.cu
+0
-2
paddle/fluid/operators/optimizers/proximal_gd_op.cu
paddle/fluid/operators/optimizers/proximal_gd_op.cu
+0
-2
paddle/fluid/operators/optimizers/rmsprop_op.cu
paddle/fluid/operators/optimizers/rmsprop_op.cu
+0
-2
paddle/fluid/operators/pad_constant_like_op.cu
paddle/fluid/operators/pad_constant_like_op.cu
+0
-2
paddle/fluid/operators/pad_op.cu
paddle/fluid/operators/pad_op.cu
+0
-2
paddle/fluid/operators/sequence_ops/sequence_pool_op.cu
paddle/fluid/operators/sequence_ops/sequence_pool_op.cu
+0
-3
paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
...e/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
+0
-2
paddle/fluid/operators/smooth_l1_loss_op.cu
paddle/fluid/operators/smooth_l1_loss_op.cu
+0
-3
paddle/fluid/operators/softmax_with_cross_entropy_op.cu
paddle/fluid/operators/softmax_with_cross_entropy_op.cu
+0
-3
paddle/fluid/operators/split_selected_rows_op.h
paddle/fluid/operators/split_selected_rows_op.h
+6
-3
paddle/fluid/operators/squared_l2_distance_op.cu
paddle/fluid/operators/squared_l2_distance_op.cu
+0
-3
paddle/fluid/operators/squared_l2_norm_op.cu
paddle/fluid/operators/squared_l2_norm_op.cu
+0
-2
paddle/fluid/operators/sum_op.cu
paddle/fluid/operators/sum_op.cu
+0
-2
paddle/fluid/platform/cuda_helper_test.cu
paddle/fluid/platform/cuda_helper_test.cu
+1
-1
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+42
-38
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+0
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+2
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/average.py
python/paddle/fluid/average.py
+1
-0
python/paddle/fluid/tests/unittests/dist_mnist.py
python/paddle/fluid/tests/unittests/dist_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py
+61
-0
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+13
-8
python/paddle/fluid/tests/unittests/test_dist_mnist.py
python/paddle/fluid/tests/unittests/test_dist_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/test_regularizer.py
python/paddle/fluid/tests/unittests/test_regularizer.py
+135
-1
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
...ddle/fluid/tests/unittests/test_split_selected_rows_op.py
+5
-0
未找到文件。
CMakeLists.txt
浏览文件 @
50fce879
...
...
@@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option
(
WITH_DOUBLE
"Compile PaddlePaddle with double precision"
OFF
)
option
(
WITH_RDMA
"Compile PaddlePaddle with RDMA support"
OFF
)
option
(
WITH_TIMER
"Compile PaddlePaddle with stats timer"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler
and gperftools
"
OFF
)
option
(
WITH_DOC
"Compile PaddlePaddle with documentation"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
...
...
@@ -254,6 +254,12 @@ elseif()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in MKL only now."
FORCE
)
endif
()
if
(
WITH_PROFILER
)
find_package
(
Gperftools REQUIRED
)
include_directories
(
${
GPERFTOOLS_INCLUDE_DIR
}
)
add_definitions
(
-DWITH_GPERFTOOLS
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
package
)
# set paddle packages
include
(
ccache
)
# set ccache for compilation
...
...
cmake/FindGperftools.cmake
0 → 100644
浏览文件 @
50fce879
# Tries to find Gperftools.
#
# Usage of this module as follows:
#
# find_package(Gperftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Gperftools_ROOT_DIR Set this variable to the root installation of
# Gperftools if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# GPERFTOOLS_FOUND System has Gperftools libs/headers
# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler)
# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers
find_library
(
GPERFTOOLS_TCMALLOC
NAMES tcmalloc
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_library
(
GPERFTOOLS_PROFILER
NAMES profiler
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_library
(
GPERFTOOLS_TCMALLOC_AND_PROFILER
NAMES tcmalloc_and_profiler
HINTS
${
Gperftools_ROOT_DIR
}
/lib
)
find_path
(
GPERFTOOLS_INCLUDE_DIR
NAMES gperftools/heap-profiler.h
HINTS
${
Gperftools_ROOT_DIR
}
/include
)
set
(
GPERFTOOLS_LIBRARIES
${
GPERFTOOLS_TCMALLOC_AND_PROFILER
}
)
include
(
FindPackageHandleStandardArgs
)
find_package_handle_standard_args
(
Gperftools
DEFAULT_MSG
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR
)
mark_as_advanced
(
Gperftools_ROOT_DIR
GPERFTOOLS_TCMALLOC
GPERFTOOLS_PROFILER
GPERFTOOLS_TCMALLOC_AND_PROFILER
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR
)
# create IMPORTED targets
if
(
Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc
)
add_library
(
gperftools::tcmalloc UNKNOWN IMPORTED
)
set_target_properties
(
gperftools::tcmalloc PROPERTIES
IMPORTED_LOCATION
${
GPERFTOOLS_TCMALLOC
}
INTERFACE_INCLUDE_DIRECTORIES
"
${
GPERFTOOLS_INCLUDE_DIR
}
"
)
add_library
(
gperftools::profiler UNKNOWN IMPORTED
)
set_target_properties
(
gperftools::profiler PROPERTIES
IMPORTED_LOCATION
${
GPERFTOOLS_PROFILER
}
INTERFACE_INCLUDE_DIRECTORIES
"
${
GPERFTOOLS_INCLUDE_DIR
}
"
)
endif
()
cmake/configure.cmake
浏览文件 @
50fce879
...
...
@@ -86,6 +86,7 @@ endif(NOT WITH_GOLANG)
if
(
WITH_GPU
)
add_definitions
(
-DPADDLE_WITH_CUDA
)
add_definitions
(
-DEIGEN_USE_GPU
)
FIND_PACKAGE
(
CUDA REQUIRED
)
...
...
cmake/generic.cmake
浏览文件 @
50fce879
...
...
@@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME)
endif
()
endfunction
(
find_fluid_modules
)
function
(
common_link TARGET_NAME
)
if
(
WITH_PROFILER
)
target_link_libraries
(
${
TARGET_NAME
}
gperftools::profiler
)
endif
()
endfunction
()
# find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs.
set_property
(
GLOBAL PROPERTY FLUID_THIRD_PARTY
)
...
...
@@ -274,6 +282,7 @@ function(cc_library TARGET_NAME)
endif
()
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
# cpplint code style
...
...
@@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME)
if
(
cc_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endfunction
(
cc_binary
)
...
...
@@ -362,6 +372,7 @@ function(cc_test TARGET_NAME)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
...
...
@@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME)
if
(
nv_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
endfunction
(
nv_binary
)
...
...
@@ -433,6 +445,7 @@ function(nv_test TARGET_NAME)
cuda_add_executable
(
${
TARGET_NAME
}
${
nv_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
if
(
nv_test_SERIAL
)
set_property
(
TEST
${
TARGET_NAME
}
PROPERTY RUN_SERIAL 1
)
...
...
@@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME)
if
(
hip_binary_DEPS
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_binary_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
()
endif
()
endfunction
(
hip_binary
)
...
...
@@ -518,6 +532,7 @@ function(hip_test TARGET_NAME)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES LINKER_LANGUAGE HIP
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
common_link
(
${
TARGET_NAME
}
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
endif
()
endfunction
(
hip_test
)
...
...
@@ -560,6 +575,7 @@ function(go_library TARGET_NAME)
endif
()
if
(
go_library_DEPS
)
add_dependencies
(
${
TARGET_NAME
}
${
go_library_DEPS
}
)
common_link
(
${
TARGET_NAME
}
)
endif
(
go_library_DEPS
)
# The "source file" of the library is `${dummyfile}` which never
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
50fce879
...
...
@@ -129,11 +129,13 @@ cc_test(version_test SRCS version_test.cc DEPS version)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version
)
if
(
NOT WIN32
)
cc_library
(
ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph
)
cc_library
(
ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler
)
endif
(
NOT WIN32
)
if
(
WITH_NGRAPH
)
if
(
NOT WIN32
)
cc_library
(
ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph
)
cc_library
(
ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler ngraph
)
endif
(
NOT WIN32
)
endif
(
WITH_NGRAPH
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc
)
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
...
...
@@ -169,11 +171,15 @@ if(WITH_DISTRIBUTE)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
if
(
WITH_NGRAPH
)
if
(
NOT WIN32
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
ngraph_operator variable_helper
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph
ngraph_operator variable_helper
)
else
(
NOT WIN32
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper
)
endif
(
NOT WIN32
)
else
(
WITH_NGRAPH
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper
)
endif
(
WITH_NGRAPH
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
50fce879
...
...
@@ -17,7 +17,6 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
...
...
@@ -26,6 +25,10 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_NGRAPH
#include "paddle/fluid/framework/ngraph_operator.h"
#endif
DECLARE_bool
(
benchmark
);
DEFINE_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
DEFINE_bool
(
use_ngraph
,
false
,
"Use NGRAPH to run"
);
...
...
@@ -88,11 +91,11 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
static
void
EnableFusedOp
(
ExecutorPrepareContext
*
ctx
)
{
#ifdef PADDLE_WITH_NGRAPH
VLOG
(
3
)
<<
"use_ngraph=True"
;
auto
intervals
=
FusedOperator
::
Fused
OpIntervals
(
&
ctx
->
ops_
);
auto
intervals
=
NgraphOperator
::
Ngraph
OpIntervals
(
&
ctx
->
ops_
);
for
(
auto
&
interval
:
intervals
)
{
auto
*
fused_op
=
new
FusedOperator
(
ctx
->
prog_
,
ctx
->
block_id_
,
interval
.
at
(
0
),
interval
.
at
(
1
));
*
interval
[
0
]
=
std
::
unique_ptr
<
OperatorBase
>
(
fused
_op
);
auto
*
ng_op
=
new
NgraphOperator
(
ctx
->
prog_
,
ctx
->
block_id_
,
interval
.
at
(
0
)
,
interval
.
at
(
1
));
*
interval
[
0
]
=
std
::
unique_ptr
<
OperatorBase
>
(
ng
_op
);
}
for
(
auto
it
=
intervals
.
rbegin
();
it
!=
intervals
.
rend
();
++
it
)
{
ctx
->
ops_
.
erase
(
it
->
at
(
0
)
+
1
,
it
->
at
(
1
));
...
...
paddle/fluid/framework/ngraph_bridge.cc
浏览文件 @
50fce879
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <functional>
#include <vector>
...
...
@@ -27,14 +26,15 @@ namespace paddle {
namespace
framework
{
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
const
VariableNameMap
&
var_map
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
var_map
.
at
(
prm
);
auto
&
var_names
=
var_map
.
at
(
name
);
PADDLE_ENFORCE_EQ
(
var_names
.
size
(),
1
,
"op %s prm %s expects one associated var"
,
op
->
Type
(),
prm
);
"op %s name %s expects one associated var"
,
op
->
Type
(),
name
);
if
(
ngb_node_map
->
find
(
var_names
[
0
])
!=
ngb_node_map
->
end
())
{
return
(
*
ngb_node_map
)[
var_names
[
0
]];
}
else
{
...
...
@@ -43,42 +43,42 @@ static std::shared_ptr<ngraph::Node> GetNode(
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetInputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Inputs
(),
ngb_node_map
);
return
GetNode
(
op
,
name
,
op
->
Inputs
(),
ngb_node_map
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Outputs
(),
ngb_node_map
);
return
GetNode
(
op
,
name
,
op
->
Outputs
(),
ngb_node_map
);
}
static
void
SetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
ngraph
::
Node
>
node
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
op
->
Outputs
().
at
(
prm
);
auto
&
var_names
=
op
->
Outputs
().
at
(
name
);
if
(
var_names
.
size
()
==
1
)
{
(
*
ngb_node_map
)[
var_names
[
0
]]
=
node
;
}
else
if
(
var_names
.
size
()
==
0
)
{
(
*
ngb_node_map
)[
""
]
=
node
;
}
else
{
PADDLE_THROW
(
"
prm %s has more than 1 var_names."
,
prm
);
PADDLE_THROW
(
"
name %s has more than 1 var_names."
,
name
);
}
}
static
bool
HasOutput
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
prm
)
{
const
std
::
string
name
)
{
auto
&
outputs
=
op
->
Outputs
();
if
(
outputs
.
find
(
prm
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
prm
).
size
()
>
0
;
if
(
outputs
.
find
(
name
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
name
).
size
()
>
0
;
}
template
<
typename
T
>
...
...
@@ -118,4 +118,3 @@ void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) {
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_bridge.h
浏览文件 @
50fce879
...
...
@@ -14,8 +14,6 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <map>
#include <string>
...
...
@@ -53,4 +51,3 @@ class NgraphBridge {
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.cc
浏览文件 @
50fce879
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <glog/logging.h>
#include <algorithm>
...
...
@@ -58,9 +57,9 @@ typedef enum { /* nGraph support state on ops */
}
op_state
;
// perform graph build through bridge and execute computation
class
Ngraph
Operator
{
class
Ngraph
Engine
{
public:
explicit
Ngraph
Operator
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
explicit
Ngraph
Engine
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>&
ops
,
const
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>&
var_type_map
,
...
...
@@ -132,7 +131,7 @@ class NgraphOperator {
};
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
FusedOperator
::
Fused
OpIntervals
(
NgraphOperator
::
Ngraph
OpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
)
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
intervals
;
...
...
@@ -185,7 +184,7 @@ FusedOperator::FusedOpIntervals(
return
intervals
;
}
FusedOperator
::
Fused
Operator
(
NgraphOperator
::
Ngraph
Operator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
...
...
@@ -215,7 +214,7 @@ FusedOperator::FusedOperator(
Process
();
}
void
Fused
Operator
::
Process
()
{
void
Ngraph
Operator
::
Process
()
{
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
for
(
auto
&
var
:
bdesc
.
AllVars
())
{
if
(
!
(
var
->
GetType
()
==
proto
::
VarType
::
SELECTED_ROWS
||
...
...
@@ -251,7 +250,7 @@ void FusedOperator::Process() {
}
}
void
Fused
Operator
::
RunImpl
(
const
Scope
&
scope
,
void
Ngraph
Operator
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
op_state
ng_op_state
=
PARTIAL_TEST
;
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
...
...
@@ -266,19 +265,19 @@ void FusedOperator::RunImpl(const Scope& scope,
ng_op_state
=
ng_op_state
==
PARTIAL_TEST
?
FULL_TEST
:
FULL_TRAIN
;
}
Ngraph
Operator
ngraph_op
(
scope
,
place
,
fused_ops_
,
var_type_map_
,
Ngraph
Engine
ngraph_engine
(
scope
,
place
,
fused_ops_
,
var_type_map_
,
persistables_
,
fetches_
,
post_op_inputs_
,
ng_op_state
);
ngraph_
op
.
Run
(
scope
,
place
);
ngraph_
engine
.
Run
(
scope
,
place
);
}
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Function
>>
Ngraph
Operator
::
func_cache_
=
{};
Ngraph
Engine
::
func_cache_
=
{};
std
::
shared_ptr
<
ngraph
::
runtime
::
Backend
>
Ngraph
Operator
::
backend_
=
std
::
shared_ptr
<
ngraph
::
runtime
::
Backend
>
Ngraph
Engine
::
backend_
=
ngraph
::
runtime
::
Backend
::
create
(
"CPU"
);
void
Ngraph
Operator
::
GetNgInputShape
(
std
::
shared_ptr
<
OperatorBase
>
op
)
{
void
Ngraph
Engine
::
GetNgInputShape
(
std
::
shared_ptr
<
OperatorBase
>
op
)
{
op
->
RuntimeInferShape
(
scope_
,
place_
);
for
(
auto
&
var_name_item
:
op
->
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
...
...
@@ -301,7 +300,7 @@ void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) {
}
}
void
Ngraph
Operator
::
BuildNgNodes
()
{
void
Ngraph
Engine
::
BuildNgNodes
()
{
for
(
auto
&
var_name
:
var_out_
)
{
if
(
var_node_map_
->
find
(
var_name
)
==
var_node_map_
->
end
())
{
auto
*
var
=
scope_
.
FindVar
(
var_name
);
...
...
@@ -323,7 +322,7 @@ void NgraphOperator::BuildNgNodes() {
}
}
void
Ngraph
Operator
::
BuildNgIO
()
{
void
Ngraph
Engine
::
BuildNgIO
()
{
std
::
unordered_set
<
std
::
string
>
inputs
;
std
::
unordered_set
<
std
::
string
>
outputs
;
...
...
@@ -395,7 +394,7 @@ void NgraphOperator::BuildNgIO() {
}
}
void
Ngraph
Operator
::
BuildNgFunction
()
{
void
Ngraph
Engine
::
BuildNgFunction
()
{
BuildNgNodes
();
ngraph_function_
=
nullptr
;
ngraph
::
NodeVector
func_outputs
;
...
...
@@ -416,7 +415,7 @@ void NgraphOperator::BuildNgFunction() {
std
::
make_shared
<
ngraph
::
Function
>
(
func_outputs
,
func_inputs
);
}
std
::
shared_ptr
<
std
::
string
>
Ngraph
Operator
::
GetCacheKey
()
{
std
::
shared_ptr
<
std
::
string
>
Ngraph
Engine
::
GetCacheKey
()
{
auto
cache_key
=
std
::
make_shared
<
std
::
string
>
(
""
);
*
cache_key
+=
std
::
to_string
(
fused_ops_
.
size
());
for
(
auto
&
op
:
fused_ops_
)
{
...
...
@@ -444,7 +443,7 @@ std::shared_ptr<std::string> NgraphOperator::GetCacheKey() {
return
cache_key
;
}
void
Ngraph
Operator
::
GetNgFunction
()
{
void
Ngraph
Engine
::
GetNgFunction
()
{
bool
cache_on
=
true
;
if
(
cache_on
)
{
std
::
string
cache_key_val
=
*
GetCacheKey
();
...
...
@@ -459,8 +458,7 @@ void NgraphOperator::GetNgFunction() {
}
}
void
NgraphOperator
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
void
NgraphEngine
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>>
t_in
;
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>>
t_out
;
...
...
@@ -545,7 +543,6 @@ void NgraphOperator::Run(const Scope& scope,
}
backend_
->
call
(
ngraph_function_
,
t_out
,
t_in
);
}
// Ngraph
Operator
::RunImpl
}
// Ngraph
Engine
::RunImpl
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.h
浏览文件 @
50fce879
...
...
@@ -14,8 +14,6 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <string>
#include <unordered_map>
...
...
@@ -34,14 +32,14 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
class
Fused
Operator
:
public
OperatorBase
{
class
Ngraph
Operator
:
public
OperatorBase
{
public:
static
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
Fused
OpIntervals
(
Ngraph
OpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
);
explicit
Fused
Operator
(
explicit
Ngraph
Operator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
...
...
@@ -64,4 +62,3 @@ class FusedOperator : public OperatorBase {
};
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/op_registry.h
浏览文件 @
50fce879
...
...
@@ -319,7 +319,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define USE_OP(op_type) \
USE_OP_ITSELF(op_type); \
USE_OP_KERNEL(op_type)
// clang-format o
ff
// clang-format o
n
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
50fce879
...
...
@@ -30,13 +30,36 @@ limitations under the License. */
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string
(
pe_profile_fname
,
""
,
"Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."
);
namespace
paddle
{
namespace
framework
{
static
std
::
once_flag
gProfileOnce
;
#ifdef WITH_GPERFTOOLS
static
bool
gProfileStarted
=
false
;
#endif
class
ParallelExecutorPrivate
{
public:
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
:
places_
(
places
)
{}
:
places_
(
places
)
{
if
(
!
FLAGS_pe_profile_fname
.
empty
())
{
std
::
call_once
(
gProfileOnce
,
[]
{
#ifdef WITH_GPERFTOOLS
ProfilerStart
(
FLAGS_pe_profile_fname
.
c_str
());
gProfileStarted
=
true
;
#else
LOG
(
WARNING
)
<<
"Paddle is not compiled with gperftools. "
"FLAGS_pe_profile_fname will be ignored"
;
#endif
});
}
}
~
ParallelExecutorPrivate
()
{
if
(
own_local_scope_
)
{
...
...
@@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices(
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
#ifdef WITH_GPERFTOOLS
if
(
gProfileStarted
)
{
ProfilerFlush
();
}
#endif
platform
::
RecordBlock
b
(
0
);
#ifdef PADDLE_WITH_CUDA
if
(
!
gcs_
.
empty
())
{
...
...
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
浏览文件 @
50fce879
...
...
@@ -44,9 +44,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument
->
SetMainProgram
(
program
.
release
());
}
else
if
(
argument
->
model_program_path_valid
()
&&
argument
->
model_params_path_valid
())
{
auto
program
=
LoadModel
(
argument
->
model_program_path
(),
argument
->
model_params_path
(),
argument
->
scope_ptr
(),
place
,
argument
->
model_from_memory
());
auto
program
=
LoadModel
(
argument
->
model_program_path
(),
argument
->
model_params_path
(),
argument
->
scope_ptr
(),
place
,
argument
->
model_from_memory_valid
()
&&
argument
->
model_from_memory
());
argument
->
SetMainProgram
(
program
.
release
());
}
else
{
PADDLE_THROW
(
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
50fce879
set
(
INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
)
set
(
INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
benchmark
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
INFERENCE_EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
)
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
50fce879
...
...
@@ -30,8 +30,10 @@
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/utils/benchmark.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
model_name
,
""
,
"model name"
);
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
...
...
@@ -40,6 +42,8 @@ DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
DEFINE_bool
(
use_analysis
,
true
,
"Running the inference program in analysis mode."
);
DEFINE_bool
(
record_benchmark
,
false
,
"Record benchmark after profiling the model"
);
DECLARE_bool
(
profile
);
DECLARE_int32
(
paddle_num_threads
);
...
...
@@ -192,8 +196,16 @@ void TestOneThreadPrediction(
predictor
->
Run
(
inputs
[
j
],
outputs
,
batch_size
);
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
run_timer
.
toc
()
/
num_times
,
inputs
.
size
());
double
latency
=
run_timer
.
toc
()
/
num_times
;
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
latency
,
inputs
.
size
());
if
(
FLAGS_record_benchmark
)
{
Benchmark
benchmark
;
benchmark
.
SetName
(
FLAGS_model_name
);
benchmark
.
SetBatchSize
(
batch_size
);
benchmark
.
SetLatency
(
latency
);
benchmark
.
PersistToFile
(
"benchmark_record.txt"
);
}
}
}
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
50fce879
...
...
@@ -135,6 +135,9 @@ TEST(TensorRT_resnext50, compare) {
TEST
(
TensorRT_resnext50
,
profile
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnext50"
;
// Set FLAGS_record_benchmark to true to record benchmark to file.
// FLAGS_record_benchmark=true;
FLAGS_model_name
=
"resnext50"
;
profile
(
model_dir
,
/* use_analysis */
true
,
FLAGS_use_tensorrt
);
}
...
...
paddle/fluid/inference/utils/benchmark.cc
浏览文件 @
50fce879
...
...
@@ -30,7 +30,7 @@ std::string Benchmark::SerializeToString() const {
ss
<<
'\n'
;
ss
<<
name_
<<
"
\t
"
;
ss
<<
batch_size_
<<
"
\t
"
;
ss
<<
batch_size_
<<
"
\t
\t
"
;
ss
<<
num_threads_
<<
"
\t
"
;
ss
<<
latency_
<<
"
\t
"
;
ss
<<
1000.0
/
latency_
;
...
...
paddle/fluid/inference/utils/visualizer.cc
浏览文件 @
50fce879
...
...
@@ -26,9 +26,6 @@ DEFINE_string(model_dir, "", "model directory");
DEFINE_string
(
model_program_path
,
""
,
"model program path"
);
DEFINE_string
(
model_params_path
,
""
,
"model params path"
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
graph_to_program_pass
);
using
paddle
::
inference
::
analysis
::
Argument
;
namespace
paddle
{
...
...
@@ -40,7 +37,6 @@ void Visualizer::SetArgument(Argument *argument) { argument_ = argument; }
bool
Visualizer
::
Run
()
{
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
inference
::
analysis
::
Analyzer
().
Run
(
argument_
);
return
true
;
}
...
...
@@ -77,7 +73,7 @@ int main(int argc, char *argv[]) {
// Only 1 pass, default filename is 0_ir_origin.dot
// For more details, looking for paddle::inference::analysis::IRPassManager
argument
.
SetIrAnalysisPasses
({
"graph_viz_pass"
});
argument
.
SetIrAnalysisPasses
({
"
infer_clean_graph_pass"
,
"
graph_viz_pass"
});
std
::
unique_ptr
<
paddle
::
framework
::
Scope
>
scope
{
new
paddle
::
framework
::
Scope
()};
...
...
@@ -90,3 +86,7 @@ int main(int argc, char *argv[]) {
return
0
;
}
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
graph_to_program_pass
);
paddle/fluid/operators/activation_op.h
浏览文件 @
50fce879
...
...
@@ -301,23 +301,22 @@ template <typename T>
struct
GeluFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp
=
((
x
*
static_cast
<
T
>
(
M_SQRT1_2
)).
erf
()).
template
cast
<
T
>().
eval
();
auto
temp
=
(
x
*
static_cast
<
T
>
(
M_SQRT1_2
)).
erf
();
out
.
device
(
d
)
=
x
*
static_cast
<
T
>
(
0.5
)
*
(
static_cast
<
T
>
(
1
)
+
temp
);
}
};
template
<
typename
T
>
struct
GeluGradFunctor
:
BaseActivationFunctor
<
T
>
{
bool
Inplace
()
const
{
return
IsInplace
(
"gelu"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp
=
(
static_cast
<
T
>
(
0.5
*
M_2_SQRTPI
*
M_SQRT1_2
)
*
x
*
((
-
static_cast
<
T
>
(
0.5
)
*
x
.
square
()).
exp
()))
.
template
cast
<
T
>()
.
eval
();
dx
.
device
(
d
)
=
dout
*
(
out
/
x
+
temp
);
auto
first
=
static_cast
<
T
>
(
0.5
)
*
(
static_cast
<
T
>
(
1
)
+
((
x
*
static_cast
<
T
>
(
M_SQRT1_2
)).
erf
()));
auto
second
=
static_cast
<
T
>
(
0.5
*
M_2_SQRTPI
*
M_SQRT1_2
)
*
x
*
(
-
static_cast
<
T
>
(
0.5
)
*
x
.
square
()).
exp
();
dx
.
device
(
d
)
=
dout
*
(
first
+
second
);
}
};
...
...
paddle/fluid/operators/bilinear_tensor_product_op.cu
浏览文件 @
50fce879
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/bilinear_tensor_product_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/concat_mkldnn_op.cc
0 → 100644
浏览文件 @
50fce879
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include "paddle/fluid/operators/concat_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
DataLayout
;
using
framework
::
Tensor
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
concat
;
using
mkldnn
::
stream
;
using
platform
::
to_void_cast
;
static
void
EnforceLayouts
(
const
std
::
vector
<
const
Tensor
*>
inputs
)
{
for
(
auto
*
input
:
inputs
)
{
const
bool
is_layout_correct
=
input
->
layout
()
==
DataLayout
::
kMKLDNN
;
const
bool
is_format_defined
=
input
->
format
()
!=
memory
::
format
::
format_undef
;
PADDLE_ENFORCE
(
is_layout_correct
&&
is_format_defined
,
"Wrong layout/format set for Input tensor"
);
}
}
static
memory
::
primitive_desc
CreateMemPrimDesc
(
const
Tensor
&
input
,
const
mkldnn
::
engine
&
engine
)
{
constexpr
auto
data_type
=
mkldnn
::
memory
::
f32
;
const
auto
dims
=
paddle
::
framework
::
vectorize2int
(
input
.
dims
());
const
auto
format
=
input
.
format
();
auto
description
=
memory
::
desc
(
dims
,
data_type
,
format
);
auto
mem_prim_desc
=
memory
::
primitive_desc
(
description
,
engine
);
return
mem_prim_desc
;
}
static
mkldnn
::
memory
::
format
GetDstMemFormat
(
const
concat
::
primitive_desc
&
concat_pd
)
{
return
(
memory
::
format
)
concat_pd
.
dst_primitive_desc
().
desc
().
data
.
format
;
}
static
platform
::
CPUPlace
GetCpuPlace
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
auto
place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
place
),
"It must use CPUPlace."
);
return
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
}
static
const
mkldnn
::
engine
&
GetMKLDNNEngine
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
return
dev_ctx
.
GetEngine
();
}
template
<
typename
T
>
class
ConcatPrimitiveFactory
{
public:
concat
::
primitive_desc
CreateConcatPrimDescriptor
(
const
std
::
vector
<
const
Tensor
*>
multi_input
,
Tensor
*
output
,
int
concat_axis
,
const
mkldnn
::
engine
&
mkldnn_engine
)
{
CreateSourcesDescriptors
(
multi_input
,
mkldnn_engine
);
auto
dst_desc
=
CreateDstMemDescriptor
(
output
);
return
concat
::
primitive_desc
(
dst_desc
,
concat_axis
,
srcs_pd
);
}
concat
CreateConcatPrimitive
(
const
concat
::
primitive_desc
&
concat_pd
,
Tensor
*
output
,
platform
::
CPUPlace
place
)
{
CreateSourcePrimitiveAts
();
dst_mem
=
CreateDstMemory
(
concat_pd
,
output
,
place
);
return
concat
(
concat_pd
,
inputs
,
dst_mem
.
get
());
}
private:
memory
::
desc
CreateDstMemDescriptor
(
Tensor
*
output
)
{
auto
dst_dims
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
return
memory
::
desc
(
dst_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
}
mkldnn
::
memory
CreateDstMemory
(
const
concat
::
primitive_desc
&
concat_pd
,
Tensor
*
output
,
platform
::
CPUPlace
place
)
{
return
memory
(
concat_pd
.
dst_primitive_desc
(),
output
->
mutable_data
<
T
>
(
place
));
}
void
CreateSourcesDescriptors
(
const
std
::
vector
<
const
Tensor
*>
multi_input
,
const
mkldnn
::
engine
&
mkldnn_engine
)
{
for
(
size_t
i
=
0
;
i
<
multi_input
.
size
();
i
++
)
{
auto
mem_prim_desc
=
CreateMemPrimDesc
(
*
multi_input
[
i
],
mkldnn_engine
);
srcs_pd
.
push_back
(
mem_prim_desc
);
srcs
.
push_back
(
memory
(
mem_prim_desc
,
to_void_cast
(
multi_input
[
i
]
->
data
<
T
>
())));
}
}
void
CreateSourcePrimitiveAts
()
{
inputs
.
reserve
(
srcs
.
size
());
for
(
size_t
i
=
0
;
i
<
srcs
.
size
();
i
++
)
{
inputs
.
push_back
(
srcs
[
i
]);
}
}
private:
std
::
vector
<
memory
::
primitive_desc
>
srcs_pd
;
std
::
vector
<
memory
>
srcs
;
std
::
vector
<
primitive
::
at
>
inputs
;
boost
::
optional
<
memory
>
dst_mem
;
// TODO(mgallus): change to std::optional
};
// upon introduction of C++17 to paddle
template
<
typename
T
>
class
ConcatMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
place
=
GetCpuPlace
(
ctx
);
const
auto
&
mkldnn_engine
=
GetMKLDNNEngine
(
ctx
);
auto
multi_input
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
EnforceLayouts
(
multi_input
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
int64_t
concat_axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
ConcatPrimitiveFactory
<
T
>
prim_creator
;
auto
concat_pd
=
prim_creator
.
CreateConcatPrimDescriptor
(
multi_input
,
output
,
static_cast
<
int
>
(
concat_axis
),
mkldnn_engine
);
auto
concat
=
prim_creator
.
CreateConcatPrimitive
(
concat_pd
,
output
,
place
);
stream
(
stream
::
kind
::
eager
).
submit
({
concat
}).
wait
();
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetDstMemFormat
(
concat_pd
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
concat
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
ConcatMKLDNNOpKernel
<
float
>
)
paddle/fluid/operators/concat_op.cc
浏览文件 @
50fce879
...
...
@@ -13,10 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include <string>
#include <vector>
#ifdef PADDLE_WITH_MKLDNN
#include <paddle/fluid/platform/mkldnn_helper.h>
#endif
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
...
...
@@ -59,6 +62,22 @@ class ConcatOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
MultiInputVar
(
"X"
)[
0
]);
#ifdef PADDLE_WITH_MKLDNN
if
(
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
DataLayout
::
kMKLDNN
,
framework
::
LibraryType
::
kMKLDNN
);
}
#endif
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
class
ConcatOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -66,6 +85,10 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"X"
,
"Input tensors of concat operator."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"Output tensor of concat operator."
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Indicates if MKL-DNN kernel will be used"
)
.
SetDefault
(
false
);
AddAttr
<
int
>
(
"axis"
,
"The axis along which the input tensors will be concatenated."
)
.
SetDefault
(
0
);
...
...
paddle/fluid/operators/cos_sim_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/cos_sim_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/crop_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/crop_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/distributed/brpc_client.cc
浏览文件 @
50fce879
...
...
@@ -158,7 +158,7 @@ ChannelQueuePtr BRPCClient::GetChannel(const std::string& ep) {
for
(
int
i
=
0
;
i
<
FLAGS_brpc_channel_num
;
++
i
)
{
std
::
shared_ptr
<
ChannelContext
>
c
(
new
ChannelContext
());
if
(
c
->
channel
.
Init
(
ep
.
c_str
(),
&
options
)
!=
0
)
{
LOG
(
ERROR
)
<<
"Fail to initialize channel"
;
LOG
(
FATAL
)
<<
"Fail to initialize channel"
;
return
nullptr
;
}
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
50fce879
...
...
@@ -390,8 +390,7 @@ void GRPCClient::Proceed() {
VLOG
(
3
)
<<
c
->
GetVarHandlePtr
()
->
String
()
<<
" process"
;
c
->
Process
();
}
else
if
(
c
->
status_
.
error_code
()
==
grpc
::
StatusCode
::
DEADLINE_EXCEEDED
)
{
// FIXME(gongwb): parse error_details?
LOG
(
ERROR
)
<<
c
->
GetVarHandlePtr
()
->
String
()
LOG
(
FATAL
)
<<
c
->
GetVarHandlePtr
()
->
String
()
<<
" meets grpc error, error_code:"
<<
c
->
status_
.
error_code
()
<<
" error_message:"
<<
c
->
status_
.
error_message
()
<<
" error_details:"
<<
c
->
status_
.
error_details
();
...
...
paddle/fluid/operators/dropout_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include <thrust/device_ptr.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/random.h>
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include "paddle/fluid/platform/float16.h"
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_pow_op.cu
浏览文件 @
50fce879
...
...
@@ -8,8 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_pow_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/elementwise/elementwise_sub_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/elementwise/elementwise_sub_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/expand_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/expand_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/gru_unit_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/gru_unit_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/hinge_loss_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/hinge_loss_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/huber_loss_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/huber_loss_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/im2sequence_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/im2sequence_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/isfinite_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/isfinite_op.h"
#include "paddle/fluid/platform/float16.h"
...
...
paddle/fluid/operators/l1_norm_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/l1_norm_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/log_loss_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/log_loss_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/math/context_project.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/math/context_project.h"
namespace
paddle
{
...
...
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
浏览文件 @
50fce879
...
...
@@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
}
};
#define INTRIAVX_FLOAT(isa,
block)
\
#define INTRIAVX_FLOAT(isa,
jit_block)
\
template <> \
LayerNormKernelImpl<float, isa,
block>::LayerNormKernelImpl(int right)
\
LayerNormKernelImpl<float, isa,
jit_block>::LayerNormKernelImpl(int right)
\
: LayerNormKernel<float>() { \
this->num_ = right; \
this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \
this->end_ = this->num_ - this->rest_; \
} \
template <> \
void LayerNormKernelImpl<float,
platform::avx, block>::Compute(
\
void LayerNormKernelImpl<float,
isa, jit_block>::Compute(
\
float* x, float* out, float* mean, float* var, const float* scale, \
const float* bias, int height, const float epsilon) const { \
__m256 sum; \
...
...
@@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
__m256 tmp; \
size_t offset; \
size_t j; \
size_t block = YMM_FLOAT_BLOCK; \
__m256 reverse_num_vec = \
_mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \
__m256 epsilon_vec = _mm256_set1_ps(epsilon); \
...
...
@@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT16
);
#endif
#ifdef __AVX2__
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx512f
,
kGT16
);
#endif
#undef INTRIAVX_FLOAT
...
...
paddle/fluid/operators/math/math_function.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/math/blas.h"
...
...
paddle/fluid/operators/math/sequence2batch.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/math/sequence2batch.h"
namespace
paddle
{
...
...
paddle/fluid/operators/math/softmax.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
...
...
paddle/fluid/operators/mean_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/mean_op.h"
#include "paddle/fluid/platform/float16.h"
...
...
paddle/fluid/operators/optimizers/adadelta_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/adadelta_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/adagrad_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/optimizers/adagrad_op.h"
...
...
paddle/fluid/operators/optimizers/adam_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/adam_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/adamax_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/adamax_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/decayed_adagrad_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/ftrl_op.cu
浏览文件 @
50fce879
...
...
@@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/ftrl_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/proximal_adagrad_op.cu
浏览文件 @
50fce879
...
...
@@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/proximal_gd_op.cu
浏览文件 @
50fce879
...
...
@@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/proximal_gd_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/optimizers/rmsprop_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/optimizers/rmsprop_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/pad_constant_like_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/pad_constant_like_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/pad_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/pad_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/sequence_ops/sequence_pool_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/sequence_ops/sequence_pool_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/smooth_l1_loss_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/smooth_l1_loss_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/softmax_with_cross_entropy_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include <cub/cub.cuh>
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
...
...
paddle/fluid/operators/split_selected_rows_op.h
浏览文件 @
50fce879
...
...
@@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
auto
rows_idx
=
outs_rows_idx
[
i
];
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
if
(
rows_idx
.
size
()
>
0
)
{
auto
dims
=
x
->
GetCompleteDims
();
dims
[
0
]
=
rows_idx
.
size
();
outs
[
i
]
->
mutable_value
()
->
mutable_data
<
T
>
(
dims
,
x
->
place
());
outs
[
i
]
->
mutable_rows
()
->
clear
();
if
(
rows_idx
.
size
()
>
0
)
{
for
(
auto
idx
:
rows_idx
)
{
outs
[
i
]
->
mutable_rows
()
->
push_back
(
idx
-
abs_sections
[
i
]);
}
...
...
@@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
}
}
}
PADDLE_ENFORCE_EQ
(
rows_idx
.
size
(),
outs
[
i
]
->
rows
().
size
(),
"rows should has the same size with tensor dim 0"
);
}
}
};
...
...
paddle/fluid/operators/squared_l2_distance_op.cu
浏览文件 @
50fce879
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/squared_l2_distance_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/squared_l2_norm_op.cu
浏览文件 @
50fce879
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/squared_l2_norm_op.h"
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/sum_op.cu
浏览文件 @
50fce879
...
...
@@ -8,8 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/float16.h"
...
...
paddle/fluid/platform/cuda_helper_test.cu
浏览文件 @
50fce879
...
...
@@ -93,7 +93,7 @@ TEST(CudaAtomic, float16) {
// unalignment of uint8
void
TestUnalign
(
size_t
num
,
const
int
shift_bit
)
{
PADDLE_ENFORCE
(
num
%
2
==
0
,
"must be a multiple of 2"
);
ASSERT_EQ
(
num
%
2
,
0
);
float16
*
in1
,
*
in2
,
*
out
;
float16
*
d_in1
,
*
d_in2
;
size_t
size
=
sizeof
(
uint8_t
)
*
(
num
+
shift_bit
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
50fce879
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/gpu_info.h"
#define EIGEN_USE_GPU
#endif
#ifdef PADDLE_WITH_MKLDNN
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
50fce879
...
...
@@ -62,22 +62,34 @@ inline std::string demangle(std::string name) { return name; }
#endif
struct
EnforceNotMet
:
public
std
::
exception
{
std
::
exception_ptr
exp_
;
std
::
string
err_str_
;
EnforceNotMet
(
std
::
exception_ptr
e
,
const
char
*
f
,
int
l
)
:
exp_
(
e
)
{
static
constexpr
int
TRACE_STACK_LIMIT
=
100
;
EnforceNotMet
(
std
::
exception_ptr
e
,
const
char
*
f
,
int
l
)
{
try
{
std
::
rethrow_exception
(
exp_
);
}
catch
(
const
std
::
exception
&
exp
)
{
std
::
rethrow_exception
(
e
);
}
catch
(
std
::
exception
&
e
)
{
Init
(
e
.
what
(),
f
,
l
);
}
}
template
<
typename
...
ARGS
>
EnforceNotMet
(
const
char
*
f
,
int
l
,
ARGS
...
args
)
{
Init
(
string
::
Sprintf
(
args
...),
f
,
l
);
}
const
char
*
what
()
const
noexcept
override
{
return
err_str_
.
c_str
();
}
private:
template
<
typename
StrType
>
inline
void
Init
(
StrType
what
,
const
char
*
f
,
int
l
)
{
static
constexpr
int
TRACE_STACK_LIMIT
=
100
;
std
::
ostringstream
sout
;
sout
<<
string
::
Sprintf
(
"%s at [%s:%d]"
,
exp
.
what
()
,
f
,
l
)
<<
std
::
endl
;
sout
<<
string
::
Sprintf
(
"%s at [%s:%d]"
,
what
,
f
,
l
)
<<
std
::
endl
;
sout
<<
"PaddlePaddle Call Stacks: "
<<
std
::
endl
;
#if !defined(_WIN32)
void
*
call_stack
[
TRACE_STACK_LIMIT
];
auto
size
=
backtrace
(
call_stack
,
TRACE_STACK_LIMIT
);
auto
symbols
=
backtrace_symbols
(
call_stack
,
size
);
Dl_info
info
;
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dladdr
(
call_stack
[
i
],
&
info
)
&&
info
.
dli_sname
)
{
...
...
@@ -85,8 +97,8 @@ struct EnforceNotMet : public std::exception {
auto
addr_offset
=
static_cast
<
char
*>
(
call_stack
[
i
])
-
static_cast
<
char
*>
(
info
.
dli_saddr
);
sout
<<
string
::
Sprintf
(
"%-3d %*0p %s + %zd
\n
"
,
i
,
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
]
,
demangled
,
addr_offset
);
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
],
demangled
,
addr_offset
);
}
else
{
sout
<<
string
::
Sprintf
(
"%-3d %*0p
\n
"
,
i
,
2
+
sizeof
(
void
*
)
*
2
,
call_stack
[
i
]);
...
...
@@ -98,9 +110,6 @@ struct EnforceNotMet : public std::exception {
#endif
err_str_
=
sout
.
str
();
}
}
const
char
*
what
()
const
noexcept
{
return
err_str_
.
c_str
();
}
};
struct
EOFException
:
public
std
::
exception
{
...
...
@@ -243,12 +252,7 @@ inline void throw_on_error(T e) {
}
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (false)
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#ifndef REPLACE_ENFORCE_GLOG
#define PADDLE_ENFORCE(...) \
...
...
paddle/fluid/platform/float16.h
浏览文件 @
50fce879
...
...
@@ -71,9 +71,6 @@ struct float16;
}
// namespace platform
}
// namespace paddle
// NOTE():
// Do not move the eigen.h header, otherwise the eigen_vector<bool> will failed.
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "unsupported/Eigen/CXX11/Tensor"
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
50fce879
...
...
@@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) {
.
def
(
"get_tensor"
,
[](
SelectedRows
&
self
)
{
return
self
.
mutable_value
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"numel"
,
[](
SelectedRows
&
self
)
->
int64_t
{
return
self
.
value
().
numel
();
})
.
def
(
"set_height"
,
&
SelectedRows
::
set_height
)
.
def
(
"height"
,
&
SelectedRows
::
height
)
.
def
(
"set_rows"
,
...
...
python/paddle/fluid/__init__.py
浏览文件 @
50fce879
...
...
@@ -127,7 +127,8 @@ def __bootstrap__():
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
...
...
python/paddle/fluid/average.py
浏览文件 @
50fce879
...
...
@@ -48,6 +48,7 @@ class WeightedAverage(object):
Examples:
.. code-block:: python
avg = fluid.average.WeightedAverage()
avg.add(value=2.0, weight=1)
avg.add(value=4.0, weight=2)
...
...
python/paddle/fluid/tests/unittests/dist_mnist.py
浏览文件 @
50fce879
...
...
@@ -93,7 +93,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
# TODO(typhoonzero): fix distributed adam optimizer
# opt = fluid.optimizer.AdamOptimizer(
# learning_rate=0.001, beta1=0.9, beta2=0.999)
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
self
.
lr
,
momentum
=
0.9
)
# Reader
train_reader
=
paddle
.
batch
(
...
...
python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py
0 → 100644
浏览文件 @
50fce879
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
test_concat_op
import
TestConcatOp
,
TestConcatOp2
,
TestConcatOp3
class
TestMKLDNNConcatOp
(
TestConcatOp
):
def
setUp
(
self
):
super
(
TestMKLDNNConcatOp
,
self
).
setUp
()
self
.
attrs
[
"use_mkldnn"
]
=
True
self
.
_cpu_only
=
True
def
test_check_grad
(
self
):
pass
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNConcatOp2
(
TestConcatOp2
):
def
setUp
(
self
):
super
(
TestMKLDNNConcatOp2
,
self
).
setUp
()
self
.
attrs
[
"use_mkldnn"
]
=
True
self
.
_cpu_only
=
True
def
test_check_grad
(
self
):
pass
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
class
TestMKLDNNConcatOp3
(
TestConcatOp3
):
def
setUp
(
self
):
super
(
TestMKLDNNConcatOp3
,
self
).
setUp
()
self
.
attrs
[
"use_mkldnn"
]
=
True
self
.
_cpu_only
=
True
def
test_check_grad
(
self
):
pass
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
50fce879
...
...
@@ -32,7 +32,7 @@ DEFAULT_BATCH_SIZE = 2
class
TestDistRunnerBase
(
object
):
def
get_model
(
self
,
batch_size
=
DEFAULT_BATCH_SIZE
):
def
get_model
(
self
,
batch_size
=
DEFAULT_BATCH_SIZE
,
lr
=
0.1
):
raise
NotImplementedError
(
"get_model should be implemented by child classes."
)
...
...
@@ -56,6 +56,7 @@ class TestDistRunnerBase(object):
return
t
def
run_pserver
(
self
,
args
):
self
.
lr
=
args
.
lr
self
.
get_model
(
batch_size
=
args
.
batch_size
)
# NOTE: pserver should not call memory optimize
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
...
...
@@ -71,6 +72,7 @@ class TestDistRunnerBase(object):
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
args
):
self
.
lr
=
args
.
lr
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
args
.
batch_size
)
...
...
@@ -189,6 +191,7 @@ def runtime_main(test_class):
parser
.
add_argument
(
'--use_reader_alloc'
,
action
=
'store_true'
,
required
=
False
)
parser
.
add_argument
(
'--batch_size'
,
required
=
False
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--lr'
,
required
=
False
,
type
=
float
,
default
=
0.001
)
parser
.
add_argument
(
'--batch_merge_repeat'
,
required
=
False
,
type
=
int
,
default
=
1
)
...
...
@@ -234,6 +237,7 @@ class TestDistBase(unittest.TestCase):
self
.
_dc_asgd
=
False
# must use with async mode
self
.
_use_reader_alloc
=
True
self
.
_nccl2_mode
=
False
self
.
_lr
=
0.001
self
.
_setup_config
()
self
.
_after_setup_config
()
...
...
@@ -284,7 +288,8 @@ class TestDistBase(unittest.TestCase):
batch_size
=
DEFAULT_BATCH_SIZE
,
batch_merge_repeat
=
1
):
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
cmd
=
"%s %s --role trainer --lr %f"
%
(
self
.
_python_interp
,
model
,
self
.
_lr
)
if
batch_size
!=
DEFAULT_BATCH_SIZE
:
cmd
+=
" --batch_size %d"
%
batch_size
if
batch_merge_repeat
>
1
:
...
...
@@ -330,13 +335,13 @@ class TestDistBase(unittest.TestCase):
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
tr_cmd
=
"%s %s --role trainer --endpoints %s --trainer_id %d --current_endpoint %s --trainers %d --update_method pserver"
tr_cmd
=
"%s %s --role trainer --endpoints %s --trainer_id %d --current_endpoint %s --trainers %d --update_method pserver
--lr %f
"
tr0_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
0
,
ps0_ep
,
self
.
_trainers
)
0
,
ps0_ep
,
self
.
_trainers
,
self
.
_lr
)
tr1_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
1
,
ps1_ep
,
self
.
_trainers
)
1
,
ps1_ep
,
self
.
_trainers
,
self
.
_lr
)
if
self
.
_sync_mode
:
tr0_cmd
+=
" --sync_mode"
...
...
@@ -425,13 +430,13 @@ class TestDistBase(unittest.TestCase):
worker_endpoints
=
self
.
_ps_endpoints
.
split
(
","
)
w0_ep
,
w1_ep
=
worker_endpoints
tr_cmd
=
"%s %s --role trainer --endpoints %s --trainer_id %d --current_endpoint %s --update_method nccl2"
tr_cmd
=
"%s %s --role trainer --endpoints %s --trainer_id %d --current_endpoint %s --update_method nccl2
--lr %f
"
tr0_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
0
,
w0_ep
)
0
,
w0_ep
,
self
.
_lr
/
2
)
tr1_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
1
,
w1_ep
)
1
,
w1_ep
,
self
.
_lr
/
2
)
if
self
.
_mem_opt
:
tr0_cmd
+=
" --mem_opt"
...
...
python/paddle/fluid/tests/unittests/test_dist_mnist.py
浏览文件 @
50fce879
...
...
@@ -36,7 +36,7 @@ class TestDistMnistNCCL2(TestDistBase):
def
test_dist_train
(
self
):
import
paddle.fluid
as
fluid
if
fluid
.
core
.
is_compiled_with_cuda
():
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1
)
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1
e-5
)
class
TestDistMnist2x2Lars
(
TestDistBase
):
...
...
python/paddle/fluid/tests/unittests/test_regularizer.py
浏览文件 @
50fce879
...
...
@@ -15,7 +15,12 @@
from
__future__
import
print_function
import
unittest
from
functools
import
partial
import
contextlib
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.regularizer
as
regularizer
...
...
@@ -97,5 +102,134 @@ class TestL1DecayRegularizer(unittest.TestCase):
self
.
assertEqual
(
block
.
ops
[
-
3
].
type
,
'sign'
)
def
bow_net
(
data
,
label
,
dict_dim
,
is_sparse
=
False
,
emb_dim
=
128
,
hid_dim
=
128
,
hid_dim2
=
96
,
class_dim
=
2
):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
is_sparse
=
is_sparse
,
size
=
[
dict_dim
,
emb_dim
])
bow
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
bow_tanh
=
fluid
.
layers
.
tanh
(
bow
)
fc_1
=
fluid
.
layers
.
fc
(
input
=
bow_tanh
,
size
=
hid_dim
,
act
=
"tanh"
)
fc_2
=
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
hid_dim2
,
act
=
"tanh"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
avg_cost
class
TestRegularizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
self
.
word_dict
),
batch_size
=
8
)()
self
.
train_data
=
[
next
(
reader
)
for
_
in
range
(
5
)]
def
get_places
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
@
contextlib
.
contextmanager
def
scope_prog_guard
(
self
,
main_prog
,
startup_prog
):
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
yield
def
run_program
(
self
,
place
,
feed_list
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
main_prog
=
fluid
.
default_main_program
()
param_list
=
[
var
.
name
for
var
in
main_prog
.
block
(
0
).
all_parameters
()]
param_sum
=
[]
for
data
in
self
.
train_data
:
out
=
exe
.
run
(
main_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
param_list
)
p_sum
=
0
for
v
in
out
:
p_sum
+=
np
.
sum
(
np
.
abs
(
v
))
param_sum
.
append
(
p_sum
)
return
param_sum
def
check_l2decay_regularizer
(
self
,
place
,
model
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
self
.
scope_prog_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.1
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1.0
))
optimizer
.
minimize
(
avg_cost
)
param_sum
=
self
.
run_program
(
place
,
[
data
,
label
])
return
param_sum
def
check_l2decay
(
self
,
place
,
model
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
self
.
scope_prog_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost_l2
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
param_list
=
fluid
.
default_main_program
().
block
(
0
).
all_parameters
()
para_sum
=
[]
for
para
in
param_list
:
para_mul
=
fluid
.
layers
.
square
(
x
=
para
)
para_sum
.
append
(
fluid
.
layers
.
reduce_sum
(
input
=
para_mul
))
avg_cost_l2
+=
fluid
.
layers
.
sums
(
para_sum
)
*
.
5
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.1
)
optimizer
.
minimize
(
avg_cost_l2
)
param_sum
=
self
.
run_program
(
place
,
[
data
,
label
])
return
param_sum
def
test_l2
(
self
):
for
place
in
self
.
get_places
():
dense_sparse_p_sum
=
[]
for
sparse
in
[
True
,
False
]:
model
=
partial
(
bow_net
,
is_sparse
=
sparse
)
framework_l2
=
self
.
check_l2decay_regularizer
(
place
,
model
)
l2
=
self
.
check_l2decay
(
place
,
model
)
assert
len
(
l2
)
==
len
(
framework_l2
)
for
i
in
range
(
len
(
l2
)):
assert
np
.
isclose
(
a
=
framework_l2
[
i
],
b
=
l2
[
i
],
rtol
=
5e-5
)
dense_sparse_p_sum
.
append
(
framework_l2
)
assert
len
(
dense_sparse_p_sum
[
0
])
==
len
(
dense_sparse_p_sum
[
1
])
for
i
in
range
(
len
(
dense_sparse_p_sum
[
0
])):
assert
np
.
isclose
(
a
=
dense_sparse_p_sum
[
0
][
i
],
b
=
dense_sparse_p_sum
[
1
][
i
],
rtol
=
5e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py
浏览文件 @
50fce879
...
...
@@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
# expected output selected rows
expected_out0_rows
=
[
0
,
4
]
expected_out1_rows
=
[
0
,
2
]
expected_out2_rows
=
[]
expected_out4_rows
=
[
0
]
op
=
Operator
(
...
...
@@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase):
self
.
assertEqual
(
outs
[
0
].
rows
(),
expected_out0_rows
)
self
.
assertEqual
(
outs
[
1
].
rows
(),
expected_out1_rows
)
self
.
assertEqual
(
outs
[
2
].
rows
(),
expected_out2_rows
)
self
.
assertEqual
(
outs
[
4
].
rows
(),
expected_out4_rows
)
self
.
assertEqual
(
outs
[
0
].
height
(),
height_sections
[
0
])
...
...
@@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase):
self
.
assertAlmostEqual
(
4.0
,
np
.
array
(
outs
[
1
].
get_tensor
())[
1
,
1
])
self
.
assertAlmostEqual
(
8.0
,
np
.
array
(
outs
[
4
].
get_tensor
())[
0
,
1
])
self
.
assertEqual
(
outs
[
2
].
numel
(),
0
)
self
.
assertEqual
(
outs
[
3
].
numel
(),
0
)
def
check_grad_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
height
=
10
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录