提交 883c7860 编写于 作者: Z Zhang, Guoming

Merge branch 'develop' into prv-calibration

...@@ -72,6 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON) ...@@ -72,6 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library" OFF)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
......
...@@ -24,6 +24,7 @@ COPY ./paddle/scripts/docker/root/ /root/ ...@@ -24,6 +24,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --allow-downgrades patchelf \ apt-get install -y --allow-downgrades patchelf \
python3 python3-dev python3-pip \
git python-pip python-dev python-opencv openssh-server bison \ git python-pip python-dev python-opencv openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
...@@ -70,24 +71,33 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8 ...@@ -70,24 +71,33 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed. # version(1.7.1 for now), which causes building documentation failed.
RUN easy_install -U pip && \ RUN pip3 install -U wheel && \
pip3 install -U docopt PyYAML sphinx==1.5.6 && \
pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
easy_install -U pip && \
pip install -U wheel && \ pip install -U wheel && \
pip install -U docopt PyYAML sphinx==1.5.6 && \ pip install -U docopt PyYAML sphinx==1.5.6 && \
pip install sphinx-rtd-theme==0.1.9 recommonmark pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip install pre-commit 'ipython==5.3.0' && \ RUN pip3 install pre-commit 'ipython==5.3.0' && \
pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3 install opencv-python && \
pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python pip install opencv-python
#For docstring checker #For docstring checker
RUN pip3 install pylint pytest astroid isort
RUN pip install pylint pytest astroid isort LinkChecker RUN pip install pylint pytest astroid isort LinkChecker
COPY ./python/requirements.txt /root/ COPY ./python/requirements.txt /root/
RUN pip3 install -r /root/requirements.txt
RUN pip install -r /root/requirements.txt RUN pip install -r /root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN apt-get install -y libssl-dev libffi-dev RUN apt-get install -y libssl-dev libffi-dev
RUN pip3 install certifi urllib3[secure]
RUN pip install certifi urllib3[secure] RUN pip install certifi urllib3[secure]
......
...@@ -40,7 +40,7 @@ set(OPENBLAS_LIB_SEARCH_PATHS ...@@ -40,7 +40,7 @@ set(OPENBLAS_LIB_SEARCH_PATHS
/usr/local/opt/openblas/lib) /usr/local/opt/openblas/lib)
find_path(OPENBLAS_INC_DIR NAMES cblas.h find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS} NO_DEFAULT_PATH)
find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas find_library(OPENBLAS_LIB NAMES openblas
......
...@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING)
endif() endif()
if(WIN32) if(WIN32)
# windows stupid compile option for all targets. # windows header option for all targets.
add_definitions(-D_XKEYCHECK_H) add_definitions(-D_XKEYCHECK_H)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
# Specify the program to use when building static libraries
SET(CMAKE_C_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
SET(CMAKE_CXX_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
# set defination for the dll export
if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC)
endif(WIN32) endif(WIN32)
if(NOT WITH_GOLANG) if(NOT WITH_GOLANG)
......
...@@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11") ...@@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
endif(NOT WIN32) endif(NOT WIN32)
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math") if(WITH_FAST_MATH)
# Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
endif()
# in cuda9, suppress cuda warning on eigen # in cuda9, suppress cuda warning on eigen
list(APPEND CUDA_NVCC_FLAGS "-w") list(APPEND CUDA_NVCC_FLAGS "-w")
# Set :expt-relaxed-constexpr to suppress Eigen warnings # Set :expt-relaxed-constexpr to suppress Eigen warnings
......
...@@ -52,6 +52,7 @@ ExternalProject_Add( ...@@ -52,6 +52,7 @@ ExternalProject_Add(
PREFIX ${ANAKIN_SOURCE_DIR} PREFIX ${ANAKIN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS ${CMAKE_ARGS_PREFIX} CMAKE_ARGS ${CMAKE_ARGS_PREFIX}
-DUSE_LOGGER=YES
-DUSE_X86_PLACE=YES -DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO -DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf -DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf
......
...@@ -3,6 +3,14 @@ INCLUDE(ExternalProject) ...@@ -3,6 +3,14 @@ INCLUDE(ExternalProject)
SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3) SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
if(NOT WITH_FAST_MATH)
# EIGEN_FAST_MATH: https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html
# enables some optimizations which might affect the accuracy of the result.
# This currently enables the SSE vectorization of sin() and cos(),
# and speedups sqrt() for single precision.
# Defined to 1 by default. Define it to 0 to disable.
add_definitions(-DEIGEN_FAST_MATH=0)
endif()
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
ExternalProject_Add( ExternalProject_Add(
......
...@@ -27,7 +27,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -27,7 +27,7 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INCLUDE_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(CBLAS_LIBRARIES SET(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
...@@ -96,7 +96,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -96,7 +96,7 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT WIN32) ENDIF(NOT WIN32)
SET(CBLAS_PROVIDER openblas) SET(CBLAS_PROVIDER openblas)
IF(WITH_C_API) IF(WITH_C_API)
INSTALL(DIRECTORY ${CBLAS_INCLUDE_DIR} DESTINATION third_party/openblas) INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas)
# Because libopenblas.a is a symbolic link of another library, thus need to # Because libopenblas.a is a symbolic link of another library, thus need to
# install the whole directory. # install the whole directory.
IF(ANDROID) IF(ANDROID)
...@@ -117,8 +117,8 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -117,8 +117,8 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
MESSAGE(STATUS "BLAS Include: ${CBLAS_INCLUDE_DIR}") MESSAGE(STATUS "BLAS Include: ${CBLAS_INC_DIR}")
INCLUDE_DIRECTORIES(${CBLAS_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
# FIXME(gangliao): generate cblas target to track all high performance # FIXME(gangliao): generate cblas target to track all high performance
# linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas)
......
...@@ -27,7 +27,6 @@ endfunction() ...@@ -27,7 +27,6 @@ endfunction()
CheckCompilerCXX11Flag() CheckCompilerCXX11Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# safe_set_flag # safe_set_flag
# #
# Set a compile flag only if compiler is support # Set a compile flag only if compiler is support
...@@ -71,6 +70,20 @@ macro(safe_set_nvflag flag_name) ...@@ -71,6 +70,20 @@ macro(safe_set_nvflag flag_name)
endif() endif()
endmacro() endmacro()
macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared
if (BUILD_SHARED_LIBS)
return() # if build shared libs, the flags keep same with '/MD'
endif(BUILD_SHARED_LIBS)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
if(NOT UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS)
...@@ -97,9 +110,13 @@ SET(CMAKE_EXTRA_INCLUDE_FILES "") ...@@ -97,9 +110,13 @@ SET(CMAKE_EXTRA_INCLUDE_FILES "")
# Common flags. the compiler flag used for C/C++ sources whenever release or debug # Common flags. the compiler flag used for C/C++ sources whenever release or debug
# Do not care if this flag is support for gcc. # Do not care if this flag is support for gcc.
# https://github.com/PaddlePaddle/Paddle/issues/12773
if (NOT WIN32)
set(COMMON_FLAGS set(COMMON_FLAGS
-fPIC -fPIC
-fno-omit-frame-pointer -fno-omit-frame-pointer
-Werror
-Wall -Wall
-Wextra -Wextra
-Wnon-virtual-dtor -Wnon-virtual-dtor
...@@ -114,11 +131,6 @@ set(COMMON_FLAGS ...@@ -114,11 +131,6 @@ set(COMMON_FLAGS
-Wno-error=terminate # Warning in PADDLE_ENFORCE -Wno-error=terminate # Warning in PADDLE_ENFORCE
) )
# https://github.com/PaddlePaddle/Paddle/issues/12773
if (NOT WIN32)
list(APPEND COMMON_FLAGS -Werror)
endif()
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
-fPIC -fPIC
-fno-omit-frame-pointer -fno-omit-frame-pointer
...@@ -133,30 +145,55 @@ set(GPU_COMMON_FLAGS ...@@ -133,30 +145,55 @@ set(GPU_COMMON_FLAGS
-Wno-error=array-bounds # Warnings in Eigen::array -Wno-error=array-bounds # Warnings in Eigen::array
) )
else(NOT WIN32)
set(COMMON_FLAGS
"/w") #disable all warnings.
set(GPU_COMMON_FLAGS
"/w") #disable all warnings
endif(NOT WIN32)
if (APPLE) if (APPLE)
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
# On Mac OS X build fat binaries with x86_64 architectures by default. # On Mac OS X build fat binaries with x86_64 architectures by default.
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
endif() endif()
else() # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
set (COMMON_FLAGS -Wno-deprecated-register)
endif(APPLE)
if(LINUX)
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
-Wall -Wall
-Wextra -Wextra
-Werror -Werror
${GPU_COMMON_FLAGS}) ${GPU_COMMON_FLAGS})
endif() endif(LINUX)
if(UNIX AND NOT APPLE) if(UNIX AND NOT APPLE)
# except apple from nix*Os family # except apple from nix*Os family
set(LINUX TRUE) set(LINUX TRUE)
endif(UNIX AND NOT APPLE) endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS}) foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach() endforeach()
foreach(flag ${GPU_COMMON_FLAGS}) foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag}) safe_set_nvflag(${flag})
endforeach() endforeach()
if(WIN32)
# windows build turn off warnings.
safe_set_static_flag()
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/W3")
string(REGEX REPLACE "/W3" "/w" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/W3")
endforeach(flag_var)
endif(WIN32)
add_custom_target(paddle_apis ALL add_custom_target(paddle_apis ALL
DEPENDS paddle_v2_apis paddle_fluid_apis) DEPENDS paddle_v2_apis)
add_custom_target(paddle_docs ALL add_custom_target(paddle_docs ALL
DEPENDS paddle_v2_docs paddle_v2_docs_cn DEPENDS paddle_v2_docs paddle_v2_docs_cn
paddle_fluid_docs paddle_fluid_docs_cn
paddle_mobile_docs paddle_mobile_docs_cn) paddle_mobile_docs paddle_mobile_docs_cn)
add_subdirectory(v2) add_subdirectory(v2)
add_subdirectory(fluid)
add_subdirectory(mobile) add_subdirectory(mobile)
...@@ -102,7 +102,7 @@ class Float16Transpiler: ...@@ -102,7 +102,7 @@ class Float16Transpiler:
continue continue
for input_arg in current_op.input_arg_names: for input_arg in current_op.input_arg_names:
if input_arg in self.input_map: if input_arg in self.input_map:
current_op.rename_input(input_arg, current_op._rename_input(input_arg,
self.input_map[input_arg]) self.input_map[input_arg])
def _remove_unused_var(self): def _remove_unused_var(self):
...@@ -187,7 +187,7 @@ class Float16Transpiler: ...@@ -187,7 +187,7 @@ class Float16Transpiler:
shape=var.shape, shape=var.shape,
persistable=var.persistable) persistable=var.persistable)
find_op(var) find_op(var)
var.op.rename_output(var_name, tmp_var_name) var.op._rename_output(var_name, tmp_var_name)
self.block._insert_op( self.block._insert_op(
i, i,
type="cast", type="cast",
......
此差异已折叠。
...@@ -13,3 +13,5 @@ if(WITH_INFERENCE) ...@@ -13,3 +13,5 @@ if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)
endif() endif()
add_subdirectory(train)
# windows treat symbolic file as a real file, which is different with unix # windows treat symbolic file as a real file, which is different with unix
# We create a hidden file and compile it instead of origin source file. # We create a hidden file and compile it instead of origin source file.
function(windows_symbolic TARGET) function(windows_symbolic TARGET)
...@@ -9,10 +10,22 @@ function(windows_symbolic TARGET) ...@@ -9,10 +10,22 @@ function(windows_symbolic TARGET)
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu) if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu)
message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.") message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
endif() endif()
# only copy the xx.cu to .xx.cu when the content are modified
set(copy_flag 1)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc SOURCE_STR)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu TARGET_STR)
if (SOURCE_STR STREQUAL TARGET_STR)
set(copy_flag 0)
endif()
endif()
if (copy_flag)
add_custom_command(OUTPUT .${src}.cu add_custom_command(OUTPUT .${src}.cu
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu" COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu") COMMENT "create hidden file of ${src}.cu")
endif(copy_flag)
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu) add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
endforeach() endforeach()
endfunction() endfunction()
...@@ -56,9 +69,9 @@ else() ...@@ -56,9 +69,9 @@ else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor) cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif() endif()
if (NOT WIN32) if (NOT WIN32)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
else() else()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
endif (NOT WIN32) endif (NOT WIN32)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
...@@ -81,6 +94,8 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu ...@@ -81,6 +94,8 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
if(WITH_GPU) if(WITH_GPU)
if (WIN32) if (WIN32)
# windows treat symbolic file as a real file, which is different with unix
# We create a hidden file and compile it instead of origin source file.
windows_symbolic(hidden_file SRCS data_type_transform.cu) windows_symbolic(hidden_file SRCS data_type_transform.cu)
nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor) nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor)
add_dependencies(data_type_transform hidden_file) add_dependencies(data_type_transform hidden_file)
...@@ -141,20 +156,22 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) ...@@ -141,20 +156,22 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else() else()
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif() endif()
if (NOT WIN32) if (NOT WIN32)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph graph_viz_pass multi_devices_graph_pass graph build_strategy
multi_devices_graph_print_pass multi_devices_graph_check_pass fast_threaded_ssa_graph_executor)
fast_threaded_ssa_graph_executor fuse_elewise_add_act_pass)
endif() # NOT WIN32 endif() # NOT WIN32
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
...@@ -167,15 +184,8 @@ cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) ...@@ -167,15 +184,8 @@ cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto)
cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
# cc_test(channel_test SRCS channel_test.cc)
cc_test(tuple_test SRCS tuple_test.cc ) cc_test(tuple_test SRCS tuple_test.cc )
if (NOT WIN32) if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc) cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32) endif (NOT WIN32)
# disable test temporarily.
# TODO https://github.com/PaddlePaddle/Paddle/issues/11971
# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op
# channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op
# conditional_block_op while_op assign_op print_op executor proto_desc)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable> // NOLINT
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
enum class ChannelAction {
SEND = 0,
RECEIVE = 1,
CLOSE = 2,
};
// Channel is the abstract class of buffered and un-buffered channels.
template <typename T>
class Channel {
public:
virtual bool CanSend() = 0;
virtual bool CanReceive() = 0;
virtual void Send(T*) = 0;
virtual bool Receive(T*) = 0;
virtual size_t Cap() = 0;
virtual void Lock() = 0;
virtual void Unlock() = 0;
virtual bool IsClosed() = 0;
virtual void Close() = 0;
virtual ~Channel() {}
virtual void AddToSendQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) = 0;
virtual void AddToReceiveQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) = 0;
virtual void RemoveFromSendQ(const void* referrer) = 0;
virtual void RemoveFromReceiveQ(const void* referrer) = 0;
};
// Forward declaration of channel implementations.
template <typename T>
class ChannelImpl;
template <typename T>
Channel<T>* MakeChannel(size_t buffer_size) {
return new ChannelImpl<T>(buffer_size);
}
template <typename T>
void CloseChannel(Channel<T>* ch) {
ch->Close();
}
/*
* The ChannelHolder class serves two main purposes:
* 1. It acts as a unified wrapper for the different kinds of
* channels, i.e. Buffered and Unbuffered channels. This is
* similar to the ReaderHolder class.
* 2. It also helps us in TypeHiding. This is similar to the
* PlaceHolder implementations in variable.h and tensor.h.
*/
class ChannelHolder {
public:
template <typename T>
void Reset(size_t buffer_size) {
holder_.reset(new PlaceholderImpl<T>(buffer_size));
}
template <typename T>
void Send(T* data) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
PADDLE_ENFORCE_EQ(
holder_->Type(), std::type_index(typeid(T)),
"Channel type is not same as the type of the data being sent");
// Static cast should be safe because we have ensured that types are same
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
PADDLE_ENFORCE_EQ(channel != nullptr, true, "Channel should not be null.");
channel->Send(data);
}
template <typename T>
bool Receive(T* data) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
PADDLE_ENFORCE_EQ(
holder_->Type(), std::type_index(typeid(T)),
"Channel type is not same as the type of the data being sent");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
PADDLE_ENFORCE_EQ(channel != nullptr, true, "Channel should not be null.");
return channel->Receive(data);
}
bool IsClosed() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->IsClosed();
}
bool CanSend() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->CanSend();
}
bool CanReceive() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->CanReceive();
}
void close() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Close();
}
size_t Cap() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->Cap();
}
void Lock() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Lock();
}
void Unlock() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Unlock();
}
template <typename T>
void AddToSendQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
if (channel != nullptr) {
channel->AddToSendQ(referrer, data, cond, cb);
}
}
template <typename T>
void AddToReceiveQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
if (channel != nullptr) {
channel->AddToReceiveQ(referrer, data, cond, cb);
}
}
void RemoveFromSendQ(const void* referrer) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->RemoveFromSendQ(referrer);
}
void RemoveFromReceiveQ(const void* referrer) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->RemoveFromReceiveQ(referrer);
}
inline bool IsInitialized() const { return holder_ != nullptr; }
inline const std::type_index Type() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->Type();
}
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of ChannelHolder.
*/
struct Placeholder {
virtual ~Placeholder() {}
virtual const std::type_index Type() const = 0;
virtual void* Ptr() const = 0;
virtual bool IsClosed() = 0;
virtual bool CanSend() = 0;
virtual bool CanReceive() = 0;
virtual void RemoveFromSendQ(const void* referrer) = 0;
virtual void RemoveFromReceiveQ(const void* referrer) = 0;
virtual void Close() = 0;
virtual void Lock() = 0;
virtual void Unlock() = 0;
virtual size_t Cap() = 0;
};
template <typename T>
struct PlaceholderImpl : public Placeholder {
explicit PlaceholderImpl(size_t buffer_size)
: type_(std::type_index(typeid(T))) {
channel_.reset(MakeChannel<T>(buffer_size));
}
virtual const std::type_index Type() const { return type_; }
virtual void* Ptr() const { return static_cast<void*>(channel_.get()); }
virtual bool IsClosed() {
if (channel_) {
return channel_->IsClosed();
}
return false;
}
virtual bool CanSend() {
if (channel_) {
return channel_->CanSend();
}
return false;
}
virtual bool CanReceive() {
if (channel_) {
return channel_->CanReceive();
}
return false;
}
virtual void RemoveFromSendQ(const void* referrer) {
if (channel_) {
channel_->RemoveFromSendQ(referrer);
}
}
virtual void RemoveFromReceiveQ(const void* referrer) {
if (channel_) {
channel_->RemoveFromReceiveQ(referrer);
}
}
virtual void Close() {
if (channel_) channel_->Close();
}
virtual size_t Cap() {
if (channel_)
return channel_->Cap();
else
return -1;
}
virtual void Lock() {
if (channel_) channel_->Lock();
}
virtual void Unlock() {
if (channel_) channel_->Unlock();
}
std::unique_ptr<Channel<T>> channel_;
const std::type_index type_;
};
// Pointer to a PlaceholderImpl object
std::unique_ptr<Placeholder> holder_;
};
} // namespace framework
} // namespace paddle
#include "paddle/fluid/framework/channel_impl.h"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <atomic>
#include <condition_variable> // NOLINT
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T>
class ChannelImpl : public paddle::framework::Channel<T> {
friend Channel<T> *paddle::framework::MakeChannel<T>(size_t);
friend void paddle::framework::CloseChannel<T>(Channel<T> *);
public:
virtual bool CanSend();
virtual bool CanReceive();
virtual void Send(T *);
virtual bool Receive(T *);
virtual size_t Cap() { return cap_; }
virtual void Lock();
virtual void Unlock();
virtual bool IsClosed();
virtual void Close();
explicit ChannelImpl(size_t);
virtual ~ChannelImpl();
virtual void AddToSendQ(const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb);
virtual void AddToReceiveQ(const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb);
virtual void RemoveFromSendQ(const void *referrer);
virtual void RemoveFromReceiveQ(const void *referrer);
private:
struct QueueMessage {
T *data;
std::shared_ptr<std::condition_variable_any> cond;
bool chan_closed = false;
bool completed = false;
const void *referrer; // TODO(thuan): figure out better way to do this
std::function<bool(ChannelAction)> callback;
explicit QueueMessage(T *item)
: data(item), cond(std::make_shared<std::condition_variable_any>()) {}
QueueMessage(T *item, std::shared_ptr<std::condition_variable_any> cond)
: data(item), cond(cond) {}
void Wait(std::unique_lock<std::recursive_mutex> &lock) {
cond->wait(lock, [this]() { return completed; });
}
void Notify() {
completed = true;
cond->notify_all();
}
};
void send_return() {
send_ctr--;
destructor_cond_.notify_all();
}
bool recv_return(bool value) {
recv_ctr--;
destructor_cond_.notify_all();
return value;
}
std::shared_ptr<QueueMessage> get_first_message(
std::deque<std::shared_ptr<QueueMessage>> *queue, ChannelAction action) {
while (!queue->empty()) {
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
std::shared_ptr<QueueMessage> m = queue->front();
queue->pop_front();
if (m->callback == nullptr || m->callback(action)) return m;
}
return nullptr;
}
size_t cap_;
std::recursive_mutex mu_;
bool closed_;
std::deque<T> buf_;
std::deque<std::shared_ptr<QueueMessage>> recvq;
std::deque<std::shared_ptr<QueueMessage>> sendq;
std::atomic<unsigned> send_ctr{0};
std::atomic<unsigned> recv_ctr{0};
std::condition_variable_any destructor_cond_;
};
template <typename T>
ChannelImpl<T>::ChannelImpl(size_t capacity)
: cap_(capacity), closed_(false), send_ctr(0), recv_ctr(0) {
PADDLE_ENFORCE_GE(capacity, 0);
}
template <typename T>
bool ChannelImpl<T>::CanSend() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return !closed_ && (!recvq.empty() || buf_.size() < cap_);
}
template <typename T>
bool ChannelImpl<T>::CanReceive() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return !(closed_ && buf_.empty()) && (!sendq.empty() || buf_.size() > 0);
}
template <typename T>
void ChannelImpl<T>::Send(T *item) {
send_ctr++;
std::unique_lock<std::recursive_mutex> lock{mu_};
// If channel is closed, throw exception
if (closed_) {
send_return();
lock.unlock();
PADDLE_THROW("Cannot send on closed channel");
}
// If there is a receiver, directly pass the value we want
// to send to the receiver, bypassing the channel buffer if any
if (!recvq.empty()) {
std::shared_ptr<QueueMessage> m =
get_first_message(&recvq, ChannelAction::SEND);
if (m != nullptr) {
*(m->data) = std::move(*item);
m->Notify();
send_return();
return;
} else {
Send(item);
send_return();
return;
}
}
// Unbuffered channel will always bypass this
// If buffered channel has space in buffer,
// write the element to the buffer.
if (buf_.size() < cap_) {
// Copy to buffer
buf_.push_back(std::move(*item));
send_return();
return;
}
// Block on channel, because some receiver will complete
// the operation for us
auto m = std::make_shared<QueueMessage>(item);
sendq.push_back(m);
m->Wait(lock);
if (m->chan_closed) {
send_return();
lock.unlock();
PADDLE_THROW("Cannot send on closed channel");
}
send_return();
}
template <typename T>
bool ChannelImpl<T>::Receive(T *item) {
recv_ctr++;
std::unique_lock<std::recursive_mutex> lock{mu_};
// If channel is closed and buffer is empty or
// channel is unbuffered
if (closed_ && buf_.empty()) return recv_return(false);
// If there is a sender, directly receive the value we want
// from the sender. In case of a buffered channel, read from
// buffer and move front of send queue to the buffer
if (!sendq.empty()) {
std::shared_ptr<QueueMessage> m =
get_first_message(&sendq, ChannelAction::RECEIVE);
if (buf_.size() > 0) {
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
// and add a QueueMessage to the buffer
*item = std::move(buf_.front());
buf_.pop_front();
// If first message from sendq is not null
// add it to the buffer and notify it
if (m != nullptr) {
// Copy to buffer
buf_.push_back(std::move(*(m->data)));
m->Notify();
} // Ignore if there is no first message
} else {
// Case 2: Channel is Unbuffered
// Do data transfer from front of SendQ
// If front is nullptr, then recursively call itself
if (m != nullptr) {
*item = std::move(*(m->data));
m->Notify();
} else {
return recv_return(Receive(item));
}
}
return recv_return(true);
}
// If this is a buffered channel and there are items in buffer
if (buf_.size() > 0) {
// Directly read from buffer
*item = std::move(buf_.front());
buf_.pop_front();
// return true
return recv_return(true);
}
// No sender available, block on this channel
// Some receiver will complete the option for us
auto m = std::make_shared<QueueMessage>(item);
recvq.push_back(m);
m->Wait(lock);
return recv_return(!m->chan_closed);
}
template <typename T>
void ChannelImpl<T>::Lock() {
mu_.lock();
}
template <typename T>
void ChannelImpl<T>::Unlock() {
mu_.unlock();
}
template <typename T>
bool ChannelImpl<T>::IsClosed() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return closed_;
}
template <typename T>
void ChannelImpl<T>::Close() {
std::unique_lock<std::recursive_mutex> lock{mu_};
if (closed_) {
// TODO(abhinavarora): closing an already closed channel should panic
lock.unlock();
return;
}
closed_ = true;
// Empty the readers
while (!recvq.empty()) {
std::shared_ptr<QueueMessage> m = recvq.front();
recvq.pop_front();
m->chan_closed = true;
// Execute callback function (if any)
if (m->callback != nullptr) {
m->callback(ChannelAction::CLOSE);
}
m->Notify();
}
// Empty the senders
while (!sendq.empty()) {
std::shared_ptr<QueueMessage> m = sendq.front();
sendq.pop_front();
m->chan_closed = true;
// Execute callback function (if any)
if (m->callback != nullptr) {
m->callback(ChannelAction::CLOSE);
}
m->Notify();
}
}
template <typename T>
void ChannelImpl<T>::AddToSendQ(
const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
std::lock_guard<std::recursive_mutex> lock{mu_};
auto m = std::make_shared<QueueMessage>(data, cond);
m->referrer = referrer;
m->callback = cb;
sendq.push_back(m);
}
template <typename T>
void ChannelImpl<T>::AddToReceiveQ(
const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
std::lock_guard<std::recursive_mutex> lock{mu_};
auto m = std::make_shared<QueueMessage>(data, cond);
m->referrer = referrer;
m->callback = cb;
recvq.push_back(m);
}
template <typename T>
void ChannelImpl<T>::RemoveFromSendQ(const void *referrer) {
std::lock_guard<std::recursive_mutex> lock{mu_};
for (auto it = sendq.begin(); it != sendq.end();) {
std::shared_ptr<QueueMessage> sendMsg = (std::shared_ptr<QueueMessage>)*it;
if (sendMsg->referrer == referrer) {
it = sendq.erase(it);
} else {
++it;
}
}
}
template <typename T>
void ChannelImpl<T>::RemoveFromReceiveQ(const void *referrer) {
std::lock_guard<std::recursive_mutex> lock{mu_};
for (auto it = recvq.begin(); it != recvq.end();) {
std::shared_ptr<QueueMessage> recvMsg = (std::shared_ptr<QueueMessage>)*it;
if (recvMsg->referrer == referrer) {
it = recvq.erase(it);
} else {
++it;
}
}
}
template <typename T>
ChannelImpl<T>::~ChannelImpl() {
Close();
// The destructor must wait for all readers and writers to complete their task
// The channel has been closed, so we will not accept new readers and writers
std::unique_lock<std::recursive_mutex> lock{mu_};
destructor_cond_.wait(lock,
[this]() { return send_ctr == 0 && recv_ctr == 0; });
}
} // namespace framework
} // namespace paddle
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thread> // NOLINT
#include "gtest/gtest.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
USE_NO_KERNEL_OP(go);
USE_NO_KERNEL_OP(channel_close);
USE_NO_KERNEL_OP(channel_create);
USE_NO_KERNEL_OP(channel_recv);
USE_NO_KERNEL_OP(channel_send);
USE_NO_KERNEL_OP(elementwise_add);
USE_NO_KERNEL_OP(select);
USE_NO_KERNEL_OP(conditional_block);
USE_NO_KERNEL_OP(equal);
USE_NO_KERNEL_OP(assign);
USE_NO_KERNEL_OP(while);
USE_NO_KERNEL_OP(print);
namespace f = paddle::framework;
namespace p = paddle::platform;
namespace paddle {
namespace framework {
template <typename T>
LoDTensor *CreateVariable(Scope *scope, const p::CPUPlace &place,
std::string name, T value) {
// Create LoDTensor<int> of dim [1]
auto var = scope->Var(name);
auto tensor = var->GetMutable<LoDTensor>();
tensor->Resize({1});
T *expect = tensor->mutable_data<T>(place);
expect[0] = value;
return tensor;
}
void AddOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, AttributeMap attrs,
BlockDesc *block) {
// insert op
auto op = block->AppendOp();
op->SetType(type);
for (auto &kv : inputs) {
op->SetInput(kv.first, kv.second);
}
for (auto &kv : outputs) {
op->SetOutput(kv.first, kv.second);
}
op->SetAttrMap(attrs);
}
void AddCase(ProgramDesc *program, Scope *scope, p::CPUPlace *place,
BlockDesc *casesBlock, int caseId, int caseType,
std::string caseChannel, std::string caseVarName,
std::function<void(BlockDesc *, Scope *)> func) {
std::string caseCondName = std::string("caseCond") + std::to_string(caseId);
std::string caseCondXVarName =
std::string("caseCondX") + std::to_string(caseId);
BlockDesc *caseBlock = program->AppendBlock(*casesBlock);
func(caseBlock, scope);
CreateVariable(scope, *place, caseCondName, false);
CreateVariable(scope, *place, caseCondXVarName, caseId);
CreateVariable(scope, *place, caseVarName, caseId);
scope->Var("step_scope");
AddOp("equal", {{"X", {caseCondXVarName}}, {"Y", {"caseToExecute"}}},
{{"Out", {caseCondName}}}, {}, casesBlock);
AddOp("conditional_block", {{"X", {caseCondName}}, {"Params", {}}},
{{"Out", {}}, {"Scope", {"step_scope"}}},
{{"sub_block", caseBlock}, {"is_scalar_condition", true}}, casesBlock);
}
void AddFibonacciSelect(Scope *scope, p::CPUPlace *place, ProgramDesc *program,
BlockDesc *parentBlock, std::string dataChanName,
std::string quitChanName) {
BlockDesc *whileBlock = program->AppendBlock(*parentBlock);
CreateVariable(scope, *place, "whileExitCond", true);
CreateVariable(scope, *place, "caseToExecute", -1);
CreateVariable(scope, *place, "case1var", 0);
CreateVariable(scope, *place, "xtemp", 0);
// TODO(thuan): Need to create fibXToSend, since channel send moves the actual
// data,
// which causes the data to be no longer accessible to do the fib calculation
// TODO(abhinav): Change channel send to do a copy instead of a move!
CreateVariable(scope, *place, "fibXToSend", 0);
CreateVariable(scope, *place, "fibX", 0);
CreateVariable(scope, *place, "fibY", 1);
CreateVariable(scope, *place, "quitVar", 0);
BlockDesc *casesBlock = program->AppendBlock(*whileBlock);
std::function<void(BlockDesc * caseBlock)> f = [](BlockDesc *caseBlock) {};
// TODO(thuan): Remove this once we change channel send to do a copy instead
// of move
AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"fibXToSend"}}}, {}, whileBlock);
// Case 0: Send to dataChanName
std::function<void(BlockDesc * caseBlock, Scope * scope)> case0Func = [&](
BlockDesc *caseBlock, Scope *scope) {
AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"xtemp"}}}, {}, caseBlock);
AddOp("assign", {{"X", {"fibY"}}}, {{"Out", {"fibX"}}}, {}, caseBlock);
AddOp("elementwise_add", {{"X", {"xtemp"}}, {"Y", {"fibY"}}},
{{"Out", {"fibY"}}}, {}, caseBlock);
};
AddCase(program, scope, place, casesBlock, 0, 1, dataChanName, "fibXToSend",
case0Func);
std::string case0Config =
std::string("0,1,") + dataChanName + std::string(",fibXToSend");
// Case 1: Receive from quitChanName
std::function<void(BlockDesc * caseBlock, Scope * scope)> case2Func = [&](
BlockDesc *caseBlock, Scope *scope) {
// Exit the while loop after we receive from quit channel.
// We assign a false to "whileExitCond" variable, which will
// break out of while_op loop
CreateVariable(scope, *place, "whileFalse", false);
AddOp("assign", {{"X", {"whileFalse"}}}, {{"Out", {"whileExitCond"}}}, {},
caseBlock);
};
AddCase(program, scope, place, casesBlock, 1, 2, quitChanName, "quitVar",
case2Func);
std::string case1Config =
std::string("1,2,") + quitChanName + std::string(",quitVar");
// Select block
AddOp("select", {{"X", {dataChanName, quitChanName}},
{"case_to_execute", {"caseToExecute"}}},
{{"Out", {}}},
{{"sub_block", casesBlock},
{"cases", std::vector<std::string>{case0Config, case1Config}}},
whileBlock);
scope->Var("stepScopes");
AddOp("while",
{{"X", {dataChanName, quitChanName}}, {"Condition", {"whileExitCond"}}},
{{"Out", {}}, {"StepScopes", {"stepScopes"}}},
{{"sub_block", whileBlock}}, parentBlock);
}
TEST(Concurrency, Go_Op) {
Scope scope;
p::CPUPlace place;
// Initialize scope variables
p::CPUDeviceContext ctx(place);
// Create channel variable
scope.Var("Channel");
// Create Variables, x0 will be put into channel,
// result will be pulled from channel
CreateVariable(&scope, place, "Status", false);
CreateVariable(&scope, place, "x0", 99);
CreateVariable(&scope, place, "result", 0);
framework::Executor executor(place);
ProgramDesc program;
BlockDesc *block = program.MutableBlock(0);
// Create channel OP
AddOp("channel_create", {}, {{"Out", {"Channel"}}},
{{"capacity", 10}, {"data_type", f::proto::VarType::LOD_TENSOR}},
block);
// Create Go Op routine
BlockDesc *goOpBlock = program.AppendBlock(program.Block(0));
AddOp("channel_send", {{"Channel", {"Channel"}}, {"X", {"x0"}}},
{{"Status", {"Status"}}}, {}, goOpBlock);
// Create Go Op
AddOp("go", {{"X", {"Channel", "x0"}}}, {}, {{"sub_block", goOpBlock}},
block);
// Create Channel Receive Op
AddOp("channel_recv", {{"Channel", {"Channel"}}},
{{"Status", {"Status"}}, {"Out", {"result"}}}, {}, block);
// Create Channel Close Op
AddOp("channel_close", {{"Channel", {"Channel"}}}, {}, {}, block);
// Check the result tensor to make sure it is set to 0
const LoDTensor &tensor = (scope.FindVar("result"))->Get<LoDTensor>();
auto *initialData = tensor.data<int>();
EXPECT_EQ(initialData[0], 0);
executor.Run(program, &scope, 0, true, true);
// After we call executor.run, the Go operator should do a channel_send to
// set the "result" variable to 99.
auto *finalData = tensor.data<int>();
EXPECT_EQ(finalData[0], 99);
}
/**
* This test implements the fibonacci function using go_op and select_op
*/
TEST(Concurrency, Select) {
Scope scope;
p::CPUPlace place;
// Initialize scope variables
p::CPUDeviceContext ctx(place);
CreateVariable(&scope, place, "Status", false);
CreateVariable(&scope, place, "result", 0);
CreateVariable(&scope, place, "currentXFib", 0);
framework::Executor executor(place);
ProgramDesc program;
BlockDesc *block = program.MutableBlock(0);
// Create channel OP
std::string dataChanName = "Channel";
scope.Var(dataChanName);
AddOp("channel_create", {}, {{"Out", {dataChanName}}},
{{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
std::string quitChanName = "Quit";
scope.Var(quitChanName);
AddOp("channel_create", {}, {{"Out", {quitChanName}}},
{{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
// Create Go Op routine, which loops 10 times over fibonacci sequence
CreateVariable(&scope, place, "xReceiveVar", 0);
BlockDesc *goOpBlock = program.AppendBlock(program.Block(0));
for (int i = 0; i < 10; ++i) {
AddOp("channel_recv", {{"Channel", {dataChanName}}},
{{"Status", {"Status"}}, {"Out", {"currentXFib"}}}, {}, goOpBlock);
AddOp("print", {{"In", {"currentXFib"}}}, {{"Out", {"currentXFib"}}},
{{"first_n", 100},
{"summarize", -1},
{"print_tensor_name", false},
{"print_tensor_type", true},
{"print_tensor_shape", false},
{"print_tensor_lod", false},
{"print_phase", std::string("FORWARD")},
{"message", std::string("X: ")}},
goOpBlock);
}
CreateVariable(&scope, place, "quitSignal", 0);
AddOp("channel_send", {{"Channel", {quitChanName}}, {"X", {"quitSignal"}}},
{{"Status", {"Status"}}}, {}, goOpBlock);
// Create Go Op
AddOp("go", {{"X", {dataChanName, quitChanName}}}, {},
{{"sub_block", goOpBlock}}, block);
AddFibonacciSelect(&scope, &place, &program, block, dataChanName,
quitChanName);
// Create Channel Close Op
AddOp("channel_close", {{"Channel", {dataChanName}}}, {}, {}, block);
AddOp("channel_close", {{"Channel", {quitChanName}}}, {}, {}, block);
executor.Run(program, &scope, 0, true, true);
// After we call executor.run, "result" variable should be equal to 34
// (which is 10 loops through fibonacci sequence)
const LoDTensor &tensor = (scope.FindVar("currentXFib"))->Get<LoDTensor>();
auto *finalData = tensor.data<int>();
EXPECT_EQ(finalData[0], 34);
}
} // namespace framework
} // namespace paddle
...@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu ...@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
# device_context reduce_op_handle ) # device_context reduce_op_handle )
cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context) DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
cc_library(build_strategy SRCS build_strategy.cc DEPS
graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace paddle {
namespace framework {
namespace details {
class ParallelExecutorPassBuilder : public ir::PassBuilder {
public:
explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy)
: ir::PassBuilder(), strategy_(strategy) {
// Add a graph viz pass to record a graph.
if (!strategy_.debug_graphviz_path_.empty()) {
auto viz_pass = AppendPass("graph_viz_pass");
const std::string graph_path = string::Sprintf(
"%s%s", strategy_.debug_graphviz_path_.c_str(), "_original_graph");
viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
}
// Add op fusion.
if (strategy.fuse_elewise_add_act_ops_) {
auto fuse_elewise_add_act_pass = AppendPass("fuse_elewise_add_act_pass");
// Add a graph viz pass to record a graph.
if (!strategy.debug_graphviz_path_.empty()) {
auto viz_pass = AppendPass("graph_viz_pass");
const std::string graph_path = string::Sprintf(
"%s%s", strategy.debug_graphviz_path_.c_str(), "_fused_graph");
viz_pass->Set<std::string>("graph_viz_path",
new std::string(graph_path));
}
}
// Convert graph to run on multi-devices.
auto multi_devices_pass = AppendPass("multi_devices_pass");
multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy",
&strategy_);
// Add a graph print pass to record a graph with device info.
if (!strategy_.debug_graphviz_path_.empty()) {
auto multi_devices_print_pass = AppendPass("multi_devices_print_pass");
multi_devices_print_pass->SetNotOwned<const std::string>(
"debug_graphviz_path", &strategy_.debug_graphviz_path_);
multi_devices_print_pass->Set<details::GraphvizSSAGraphPrinter>(
"graph_printer", new details::GraphvizSSAGraphPrinter);
}
// Verify that the graph is correct for multi-device executor.
AppendPass("multi_devices_check_pass");
}
private:
BuildStrategy strategy_;
};
std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy()
const {
pass_builder_.reset(new ParallelExecutorPassBuilder(*this));
return pass_builder_;
}
std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const ProgramDesc &main_program, const std::vector<platform::Place> &places,
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
#else
const bool use_cuda) const {
#endif
// Create a default one if not initialized by user.
if (!pass_builder_) {
CreatePassesFromStrategy();
}
std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));
for (std::shared_ptr<ir::Pass> &pass : pass_builder_->AllPasses()) {
if (pass->Type() == "multi_devices_pass") {
pass->Erase("places");
pass->SetNotOwned<const std::vector<platform::Place>>("places", &places);
pass->Erase("loss_var_name");
pass->SetNotOwned<const std::string>("loss_var_name", &loss_var_name);
pass->Erase("params");
pass->SetNotOwned<const std::unordered_set<std::string>>("params",
&param_names);
pass->Erase("local_scopes");
pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
&local_scopes);
#ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
pass->Erase("nccl_ctxs");
pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
#endif
}
graph = pass->Apply(std::move(graph));
}
return graph;
}
} // namespace details
} // namespace framework
} // namespace paddle
USE_PASS(fuse_elewise_add_act_pass);
USE_PASS(graph_viz_pass);
USE_PASS(multi_devices_pass);
USE_PASS(multi_devices_check_pass);
USE_PASS(multi_devices_print_pass);
...@@ -15,6 +15,17 @@ ...@@ -15,6 +15,17 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -57,6 +68,30 @@ struct BuildStrategy { ...@@ -57,6 +68,30 @@ struct BuildStrategy {
bool fuse_elewise_add_act_ops_{false}; bool fuse_elewise_add_act_ops_{false};
bool enable_data_balance_{false}; bool enable_data_balance_{false};
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// A new PassBuilder is created based on configs defined above and
// passes are owned by the PassBuilder.
std::shared_ptr<ir::PassBuilder> CreatePassesFromStrategy() const;
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
std::unique_ptr<ir::Graph> Apply(
const ProgramDesc &main_program,
const std::vector<platform::Place> &places,
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
#else
const bool use_cuda) const;
#endif
private:
mutable std::shared_ptr<ir::PassBuilder> pass_builder_;
}; };
} // namespace details } // namespace details
......
...@@ -20,79 +20,37 @@ namespace paddle { ...@@ -20,79 +20,37 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
// Change it to thread safe flags if needed. template <class T>
class ThreadUnsafeOwnershipFlags { class COWPtr {
public: public:
explicit ThreadUnsafeOwnershipFlags(bool flag) : flag_(flag) {} typedef std::shared_ptr<T> RefPtr;
ThreadUnsafeOwnershipFlags(const ThreadUnsafeOwnershipFlags& other) = delete;
ThreadUnsafeOwnershipFlags& operator=(
const ThreadUnsafeOwnershipFlags& other) = delete;
ThreadUnsafeOwnershipFlags(ThreadUnsafeOwnershipFlags&& other) = default;
void SetOwnership(bool flag) { flag_ = flag; }
// Invoke the callback if it is not owned.
template <typename Callback>
void AcquireOwnershipOnce(Callback acquire) {
if (!flag_) {
acquire();
flag_ = true;
}
}
private: private:
bool flag_; RefPtr m_sp;
};
// Copy-On-Write pointer.
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
//
// The template parameter OwnershipFlags should have:
// * a constructor takes a bool. True if own.
// * SetOwnership(bool flag).
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
// owned.
//
// https://en.wikipedia.org/wiki/Copy-on-write
template <typename T, typename OwnershipFlags = ThreadUnsafeOwnershipFlags>
class COWPtr {
public: public:
// Ctor from raw pointer. COWPtr() : m_sp(nullptr) {}
explicit COWPtr(T* ptr) : payload_(ptr), ownership_{true} {} explicit COWPtr(T* t) : m_sp(t) {}
// Move methods. Steal ownership from origin const T& Data() const { return *m_sp; }
COWPtr(COWPtr&& other)
: payload_(other.payload_), ownership_{std::move(other.ownership_)} {}
COWPtr& operator=(COWPtr&& origin) = default;
// Copy methods. Not own payload
COWPtr(const COWPtr& other) : payload_(other.payload_), ownership_{false} {}
COWPtr& operator=(const COWPtr& other) {
payload_ = other.payload_;
ownership_.SetOwnership(false);
return *this;
}
// Access read only data.
const T& Data() const { return *payload_; }
// Access mutable data. If the data is not owned, the data will be copied
// before.
T* MutableData() { T* MutableData() {
ownership_.AcquireOwnershipOnce( DetachIfNotUnique();
[this] { payload_.reset(new T(*payload_)); }); return m_sp.get();
return payload_.get();
} }
private: void DetachIfNotUnique() {
// Actual data pointer. T* tmp = m_sp.get();
std::shared_ptr<T> payload_; if (!(tmp == nullptr || m_sp.unique())) {
Detach();
}
}
// Ownership flag. void Detach() {
OwnershipFlags ownership_; T* tmp = m_sp.get();
m_sp = RefPtr(new T(*tmp));
}
}; };
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -30,6 +30,14 @@ TEST(COWPtr, all) { ...@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
ASSERT_EQ(ptr2.Data(), 10); ASSERT_EQ(ptr2.Data(), 10);
} }
TEST(COWPtr, change_old) {
COWPtr<int> ptr(new int{0});
COWPtr<int> ptr2 = ptr;
*ptr.MutableData() = 10;
ASSERT_EQ(ptr2.Data(), 0);
ASSERT_EQ(ptr.Data(), 10);
}
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -80,16 +80,16 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl( ...@@ -80,16 +80,16 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
// This is weird but there is really some variables without var_desc // This is weird but there is really some variables without var_desc
// in computation_op // in computation_op
if (var_desc == nullptr) { if (var_desc == nullptr) {
if (compute_op->Node()->Op()->Block()->FindVar(var_name) == nullptr) var_desc = compute_op->Node()->Op()->Block()->FindVar(var_name);
continue; if (var_desc == nullptr) continue;
} else { }
if (var_desc->Persistable()) continue; if (var_desc->Persistable()) continue;
auto var_type = var_desc->Proto()->type().type(); auto var_type = var_desc->Proto()->type().type();
if (var_type != proto::VarType::LOD_TENSOR && if (var_type != proto::VarType::LOD_TENSOR &&
var_type != proto::VarType::SELECTED_ROWS) { var_type != proto::VarType::SELECTED_ROWS) {
continue; continue;
} }
}
// compute op only runs in one device // compute op only runs in one device
if (ref_cnts[place.device]->count(var_name)) if (ref_cnts[place.device]->count(var_name))
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
...@@ -76,15 +75,13 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) { ...@@ -76,15 +75,13 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
var->GetMutable<platform::PlaceList>(); var->GetMutable<platform::PlaceList>();
} else if (var_type == proto::VarType::READER) { } else if (var_type == proto::VarType::READER) {
var->GetMutable<ReaderHolder>(); var->GetMutable<ReaderHolder>();
} else if (var_type == proto::VarType::CHANNEL) {
var->GetMutable<ChannelHolder>();
} else if (var_type == proto::VarType::RAW) { } else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator // GetMutable will be called in operator
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"Variable type %d is not in " "Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, " "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]", "LOD_RANK_TABLE, PLACE_LIST, READER, RAW]",
var_type); var_type);
} }
} }
......
...@@ -126,7 +126,6 @@ message VarType { ...@@ -126,7 +126,6 @@ message VarType {
LOD_TENSOR_ARRAY = 13; LOD_TENSOR_ARRAY = 13;
PLACE_LIST = 14; PLACE_LIST = 14;
READER = 15; READER = 15;
CHANNEL = 16;
// Any runtime decided variable type is raw // Any runtime decided variable type is raw
// raw variables should manage their own allocations // raw variables should manage their own allocations
// in operators like nccl_op // in operators like nccl_op
...@@ -158,12 +157,6 @@ message VarType { ...@@ -158,12 +157,6 @@ message VarType {
message ReaderDesc { repeated LoDTensorDesc lod_tensor = 1; } message ReaderDesc { repeated LoDTensorDesc lod_tensor = 1; }
optional ReaderDesc reader = 5; optional ReaderDesc reader = 5;
message ChannelDesc {
required Type data_type = 1;
required int64 capacity = 2;
}
optional ChannelDesc channel = 6;
message Tuple { repeated Type element_type = 1; } message Tuple { repeated Type element_type = 1; }
optional Tuple tuple = 7; optional Tuple tuple = 7;
} }
......
set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
file(WRITE ${pass_file} "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n") file(WRITE ${pass_file} "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n")
file(APPEND ${pass_file} "\#pragma once\n")
file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n") file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
...@@ -28,12 +29,13 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap ...@@ -28,12 +29,13 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
pass_library(graph_to_program_pass base) pass_library(graph_to_program_pass base)
pass_library(graph_viz_pass base) pass_library(graph_viz_pass base)
pass_library(fc_fuse_pass inference) pass_library(fc_fuse_pass inference)
if(WITH_MKLDNN) if (WITH_MKLDNN)
pass_library(conv_relu_mkldnn_fuse_pass inference) pass_library(conv_relu_mkldnn_fuse_pass inference)
endif() endif ()
pass_library(attention_lstm_fuse_pass inference) pass_library(attention_lstm_fuse_pass inference)
pass_library(infer_clean_graph_pass inference) pass_library(infer_clean_graph_pass inference)
pass_library(fc_lstm_fuse_pass inference) pass_library(fc_lstm_fuse_pass inference)
pass_library(embedding_fc_lstm_fuse_pass inference)
pass_library(fc_gru_fuse_pass inference) pass_library(fc_gru_fuse_pass inference)
pass_library(seq_concat_fc_fuse_pass inference) pass_library(seq_concat_fc_fuse_pass inference)
...@@ -41,12 +43,14 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass ...@@ -41,12 +43,14 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library") set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
cc_library(pass_builder SRCS pass_builder.cc DEPS pass)
cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper) cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry) cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry) cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass) cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector) cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
if(WITH_MKLDNN) if (WITH_MKLDNN)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
endif() endif ()
...@@ -257,6 +257,22 @@ std::unique_ptr<ir::Graph> AttentionLSTMFusePass::ApplyImpl( ...@@ -257,6 +257,22 @@ std::unique_ptr<ir::Graph> AttentionLSTMFusePass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
PDPattern external_pattern, subblock_pattern; PDPattern external_pattern, subblock_pattern;
// Use the following variables to tell whether this model is RNN1.
// This fuse can only works on the RNN1 model.
std::unordered_set<std::string> specified_vars({"data_lod_attention",
"cell_init", "hidden_init",
"data", "week", "minute"});
int count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsVar() && specified_vars.count(node->Name())) {
++count;
}
}
if (count < specified_vars.size()) {
return graph;
}
// Continue to fuse.
FindWhileOp(graph.get()); FindWhileOp(graph.get());
return graph; return graph;
} }
......
...@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
PADDLE_ENFORCE(graph.get()); PADDLE_ENFORCE(graph.get());
FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get()); FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get());
std::unordered_set<Node*> nodes2delete;
GraphPatternDetector gpd; GraphPatternDetector gpd;
auto* conv_input = gpd.mutable_pattern() auto* conv_input = gpd.mutable_pattern()
->NewNode("conv_relu_mkldnn_fuse/conv_input") ->NewNode("conv_relu_mkldnn_fuse/conv_input")
...@@ -43,35 +41,19 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -43,35 +41,19 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
VLOG(4) << "handle ConvReLU fuse"; VLOG(4) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern); // Bias
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op
GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out
GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op
// Create an ConvReLU Node. // Transform Conv node into ConvReLU node.
OpDesc desc; OpDesc* desc = conv->Op();
std::string conv_relu_i_in = subgraph.at(conv_input)->Name(); desc->SetOutput("Output", std::vector<std::string>({relu_out->Name()}));
std::string conv_relu_w_in = conv_weight->Name(); desc->SetAttr("fuse_relu", true);
std::string conv_relu_b_in = conv_bias->Name(); GraphSafeRemoveNodes(graph.get(), {relu, conv_out});
std::string conv_relu_out = relu_out->Name();
desc.SetInput("Input", std::vector<std::string>({conv_relu_i_in}));
desc.SetInput("Filter", std::vector<std::string>({conv_relu_w_in}));
desc.SetInput("Bias", std::vector<std::string>({conv_relu_b_in}));
desc.SetOutput("Output", std::vector<std::string>({conv_relu_out}));
desc.SetType("conv2d");
for (auto& attr : conv->Op()->GetAttrMap()) {
desc.SetAttr(attr.first, attr.second);
}
desc.SetAttr("fuse_relu", true);
auto conv_relu_node = g->CreateOpNode(&desc); // OpDesc will be copied.
GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out});
PADDLE_ENFORCE(subgraph.count(conv_input)); PADDLE_ENFORCE(subgraph.count(conv_input));
IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node); IR_NODE_LINK_TO(conv, relu_out);
IR_NODE_LINK_TO(conv_weight, conv_relu_node);
IR_NODE_LINK_TO(conv_bias, conv_relu_node);
IR_NODE_LINK_TO(conv_relu_node, relu_out);
found_conv_relu_count++; found_conv_relu_count++;
}; };
......
...@@ -85,19 +85,16 @@ TEST(ConvReLUFusePass, basic) { ...@@ -85,19 +85,16 @@ TEST(ConvReLUFusePass, basic) {
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()->Type() == "conv2d") { if (node->IsOp() && node->Op()->Type() == "conv2d") {
if (node->Op()->HasAttr("use_mkldnn")) { auto* op = node->Op();
bool use_mkldnn = boost::get<bool>(node->Op()->GetAttr("use_mkldnn")); ASSERT_TRUE(op->HasAttr("use_mkldnn"));
if (use_mkldnn) { EXPECT_TRUE(boost::get<bool>(op->GetAttr("use_mkldnn")));
if (node->Op()->HasAttr("fuse_relu")) { ASSERT_TRUE(op->HasAttr("fuse_relu"));
bool fuse_relu = boost::get<bool>(node->Op()->GetAttr("fuse_relu")); bool fuse_relu = boost::get<bool>(op->GetAttr("fuse_relu"));
if (fuse_relu) { if (fuse_relu) {
++conv_relu_count; ++conv_relu_count;
} }
} }
} }
}
}
}
EXPECT_EQ(conv_relu_count, 1); EXPECT_EQ(conv_relu_count, 1);
} }
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h"
#include <algorithm>
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace framework {
namespace ir {
static int BuildFusion(Graph* graph, const std::string& name_scope,
Scope* scope, bool with_fc_bias) {
GraphPatternDetector gpd;
auto* pattern = gpd.mutable_pattern();
// Build pattern
PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x"))
->assert_is_op_input("lookup_table")
->assert_var_not_persistable();
patterns::Embedding embedding_pattern(pattern, name_scope);
// TODO(jczaja): Intermediate can only be for val that are not used anywhere
// but lookup table output may go into other LSTM (for reverse
// direction)
auto* embedding_out = embedding_pattern(x);
patterns::FC fc_pattern(pattern, name_scope);
// fc_out is a tmp var, will be removed after fuse, so marked as intermediate.
auto* fc_out = fc_pattern(embedding_out, with_fc_bias)->AsIntermediate();
patterns::LSTM lstm_pattern(pattern, name_scope);
lstm_pattern(fc_out);
// Create New OpDesc
auto embedding_lstm_creator = [&](Node* embedding, Node* W, Node* lstm,
Node* input, Node* weight_x, Node* weight_h,
Node* bias, Node* hidden, Node* cell,
Node* xx, Node* fc_bias) {
OpDesc op_desc;
op_desc.SetType("fused_embedding_fc_lstm");
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
SET_IN(Ids, input);
SET_IN(WeightH, weight_h);
// Neet to have this passed as We need Wc data for peephole connections
SET_IN(Bias, bias);
#undef SET_IN
// Multiply embeddings with Weights
PADDLE_ENFORCE(scope);
const std::string& embeddings = patterns::UniqueKey("Embeddings");
auto* embeddings_var = scope->Var(embeddings);
PADDLE_ENFORCE(embeddings_var);
auto* embeddings_tensor =
embeddings_var->GetMutable<framework::LoDTensor>();
// Get WeightX size: [single_embedding, fc_size]
// and embedding size: [dict_size, single_embedding]
// and create new size of embeddings eg. [dict_size , hidden_size]
auto* embedding_var = scope->FindVar(W->Name());
PADDLE_ENFORCE(embedding_var);
const auto& embedding_tensor = embedding_var->Get<framework::LoDTensor>();
const auto& weightx_tensor =
scope->FindVar(weight_x->Name())->Get<framework::LoDTensor>();
embeddings_tensor->Resize(
{embedding_tensor.dims()[0], weightx_tensor.dims()[1]});
// Multiplie embeddings via WeightsX and add bias
auto embedding_data = embedding_tensor.data<float>();
auto weightx_data = weightx_tensor.data<float>();
auto embeddings_data =
embeddings_tensor->mutable_data<float>(platform::CPUPlace());
// Adding biases to GEMM result to be
auto* lstm_bias_var = scope->FindVar(bias->Name());
PADDLE_ENFORCE(lstm_bias_var);
const auto& lstm_bias_tensor = lstm_bias_var->Get<framework::LoDTensor>();
auto alpha = 1.0f;
auto beta = 1.0f;
int m = embedding_tensor.dims()[0];
int n = weightx_tensor.dims()[1];
int k = embedding_tensor.dims()[1];
// Copy only gate biases values (only actual bias data, not peephole
// weights)
std::vector<float> combined_biases;
combined_biases.reserve(n);
std::copy_n(lstm_bias_tensor.data<float>(), n,
std::back_inserter(combined_biases));
if (with_fc_bias) {
// Add FC-bias with LSTM-bias (into GEMM result to be)
auto* fc_bias_var = scope->FindVar(fc_bias->Name());
const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>();
for (int i = 0; i < fc_bias_tensor.numel(); i++) {
combined_biases[i] += fc_bias_tensor.data<float>()[i];
}
}
// broadcast biases
std::vector<float> ones(m, 1.0f);
paddle::operators::math::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, 1, alpha, &ones[0], 1,
&combined_biases[0], n, 0.0f, embeddings_data, n);
// Wx*embeddings + biases
paddle::operators::math::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha,
embedding_data, k, weightx_data, n, beta, embeddings_data, n);
op_desc.SetInput("Embeddings", {embeddings});
// Create temp variables.
const std::string BatchedInput = patterns::UniqueKey("BatchedInput");
const std::string BatchedCellPreAct =
patterns::UniqueKey("BatchedCellPreAct");
const std::string BatchedGate = patterns::UniqueKey("BatchedGate");
scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>();
op_desc.SetInput("H0", {});
op_desc.SetInput("C0", {});
op_desc.SetOutput("Hidden", {hidden->Name()});
op_desc.SetOutput("Cell", {cell->Name()});
op_desc.SetOutput("XX", {xx->Name()});
op_desc.SetOutput("BatchedGate", {BatchedGate});
op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct});
op_desc.SetOutput("BatchedInput", {BatchedInput});
op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
// TODO(TJ): get from attr
op_desc.SetAttr("use_seq", true);
PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
auto* scope = graph->Get<Scope*>(kParamScopeAttr);
#define OP_SET_OUT(x) \
const std::string x = patterns::UniqueKey(#x); \
op_desc.SetOutput(#x, {x}); \
scope->Var(x)->GetMutable<LoDTensor>()
OP_SET_OUT(BatchedCell);
OP_SET_OUT(BatchedHidden);
OP_SET_OUT(ReorderedH0);
OP_SET_OUT(ReorderedC0);
#undef OP_SET_OUT
auto* op = graph->CreateOpNode(&op_desc);
IR_NODE_LINK_TO(input, op);
IR_NODE_LINK_TO(weight_x, op);
IR_NODE_LINK_TO(weight_h, op);
IR_NODE_LINK_TO(bias, op);
IR_NODE_LINK_TO(op, hidden);
return op;
};
int fusion_count{0};
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(lookup_table, lookup_table, embedding_pattern);
GET_IR_NODE_FROM_SUBGRAPH(W, W, embedding_pattern);
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
// TODO(jczaja): Add support for is_sparse / is_distributed
auto is_sparse = boost::get<bool>(lookup_table->Op()->GetAttr("is_sparse"));
auto is_distributed =
boost::get<bool>(lookup_table->Op()->GetAttr("is_distributed"));
if (is_sparse == true || is_distributed == true) {
return;
}
if (with_fc_bias) {
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
Bias, Hidden, Cell, fc_out, fc_bias);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
std::unordered_set<const Node*> marked_nodes(
//{lookup_table, mul, lstm, elementwise_add, fc_bias, W});
{mul, lstm, elementwise_add, fc_bias});
GraphSafeRemoveNodes(graph, marked_nodes);
} else {
GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
Bias, Hidden, Cell, fc_out, nullptr);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
// std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
// lstm});
std::unordered_set<const Node*> marked_nodes({mul, lstm});
GraphSafeRemoveNodes(graph, marked_nodes);
}
++fusion_count;
};
gpd(graph, handler);
return fusion_count;
}
std::unique_ptr<ir::Graph> EmbeddingFCLSTMFusePass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
FusePassBase::Init(name_scope_, graph.get());
int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(),
true /*with_fc_bias*/);
AddStatis(fusion_count);
return graph;
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(embedding_fc_lstm_fuse_pass,
paddle::framework::ir::EmbeddingFCLSTMFusePass);
...@@ -11,29 +11,30 @@ ...@@ -11,29 +11,30 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <chrono> // NOLINT #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle { namespace paddle {
namespace inference { namespace framework {
namespace ir {
// Fusing of Embedding , FC and LSTM op
// Timer for timer // Just FC without bias
class Timer { class EmbeddingFCLSTMFusePass : public FusePassBase {
public: public:
std::chrono::high_resolution_clock::time_point start; virtual ~EmbeddingFCLSTMFusePass() {}
std::chrono::high_resolution_clock::time_point startu;
protected:
void tic() { start = std::chrono::high_resolution_clock::now(); } std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
double toc() {
startu = std::chrono::high_resolution_clock::now(); const std::string name_scope_{"embedding_fc_lstm_fuse"};
std::chrono::duration<double> time_span =
std::chrono::duration_cast<std::chrono::duration<double>>(startu -
start);
double used_time_ms = static_cast<double>(time_span.count()) * 1000.0;
return used_time_ms;
}
}; };
} // namespace inference } // namespace ir
} // namespace framework
} // namespace paddle } // namespace paddle
...@@ -77,10 +77,12 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, ...@@ -77,10 +77,12 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
const std::string BatchedCellPreAct = const std::string BatchedCellPreAct =
patterns::UniqueKey("BatchedCellPreAct"); patterns::UniqueKey("BatchedCellPreAct");
const std::string BatchedGate = patterns::UniqueKey("BatchedGate"); const std::string BatchedGate = patterns::UniqueKey("BatchedGate");
const std::string CheckedCell = patterns::UniqueKey("CheckedCell");
scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>(); scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>(); scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>(); scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>();
scope->Var(CheckedCell)->GetMutable<framework::LoDTensor>();
op_desc.SetInput("H0", {}); op_desc.SetInput("H0", {});
op_desc.SetInput("C0", {}); op_desc.SetInput("C0", {});
...@@ -90,6 +92,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, ...@@ -90,6 +92,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
op_desc.SetOutput("BatchedGate", {BatchedGate}); op_desc.SetOutput("BatchedGate", {BatchedGate});
op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct}); op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct});
op_desc.SetOutput("BatchedInput", {BatchedInput}); op_desc.SetOutput("BatchedInput", {BatchedInput});
op_desc.SetOutput("CheckedCell", {CheckedCell});
op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse")); op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes")); op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
// TODO(TJ): get from attr // TODO(TJ): get from attr
......
...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_helper.h"
#include <algorithm> #include <algorithm>
#include <deque>
#include <unordered_set> #include <unordered_set>
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
...@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( ...@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
return adj_list; return adj_list;
} }
size_t GraphNum(const Graph &graph) {
std::unordered_set<ir::Node *> nodes = graph.Nodes();
std::unordered_set<ir::Node *> visited_nodes;
visited_nodes.reserve(nodes.size());
std::deque<ir::Node *> q_nodes;
std::vector<std::unordered_set<ir::Node *>> graph_nodes;
std::unordered_set<ir::Node *> g_nodes;
size_t graph_count = 0;
auto traverse_nodes = [&visited_nodes,
&q_nodes](const std::vector<ir::Node *> &nodes) {
std::copy_if(
nodes.begin(), nodes.end(), std::back_inserter(q_nodes),
[&visited_nodes](Node *node) { return !visited_nodes.count(node); });
};
while (visited_nodes.size() != nodes.size()) {
if (!q_nodes.empty()) {
auto cur_node = q_nodes.front();
q_nodes.pop_front();
visited_nodes.insert(cur_node);
g_nodes.insert(cur_node);
traverse_nodes(cur_node->inputs);
traverse_nodes(cur_node->outputs);
} else {
++graph_count;
if (g_nodes.size()) {
graph_nodes.emplace_back(g_nodes);
}
g_nodes.clear();
for (auto &n : nodes) {
if (visited_nodes.count(n) == 0) {
q_nodes.push_back(n);
break;
}
}
}
}
if (g_nodes.size()) {
graph_nodes.emplace_back(g_nodes);
}
if (VLOG_IS_ON(10)) {
VLOG(10) << "graph_num: " << graph_nodes.size();
for (auto &g_n : graph_nodes) {
VLOG(10) << "graph_nodes: " << g_n.size();
if (g_n.size() < 10) {
std::stringstream out;
for (auto &node : g_n) {
out << "\nNode: " << node->Name() << " in [";
for (auto &n : node->inputs) {
out << n->Name() << ", ";
}
out << "], out[";
for (auto &n : node->outputs) {
out << n->Name() << ", ";
}
out << "]";
}
VLOG(10) << out.str();
}
}
}
return graph_count;
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -27,6 +27,8 @@ namespace ir { ...@@ -27,6 +27,8 @@ namespace ir {
// Test if the graph contains circle. // Test if the graph contains circle.
bool HasCircle(const Graph &graph); bool HasCircle(const Graph &graph);
size_t GraphNum(const Graph &graph);
// Topology Sort the operations in the graph from inputs to outputs. // Topology Sort the operations in the graph from inputs to outputs.
// `graph` cannot contain circle. // `graph` cannot contain circle.
std::vector<ir::Node *> TopologySortOperations(const Graph &graph); std::vector<ir::Node *> TopologySortOperations(const Graph &graph);
......
...@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) { ...@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) {
ASSERT_EQ(node_map.at("op2"), 1UL); ASSERT_EQ(node_map.at("op2"), 1UL);
ASSERT_TRUE(node_map.at("op3") < node_map.at("op5")); ASSERT_TRUE(node_map.at("op3") < node_map.at("op5"));
} }
void BuildZeroGraph(Graph* g) {}
void BuildOneGraph(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation);
ir::Node* o3 = g->CreateEmptyNode("op3", Node::Type::kOperation);
ir::Node* o4 = g->CreateEmptyNode("op4", Node::Type::kOperation);
ir::Node* o5 = g->CreateEmptyNode("op5", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
ir::Node* v2 = g->CreateEmptyNode("var2", Node::Type::kVariable);
ir::Node* v3 = g->CreateEmptyNode("var3", Node::Type::kVariable);
ir::Node* v4 = g->CreateEmptyNode("var4", Node::Type::kVariable);
// o1->v1->o2
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
// o2->v2->o3
// o2->v2->o4
o2->outputs.push_back(v2);
o3->inputs.push_back(v2);
o4->inputs.push_back(v2);
v2->inputs.push_back(o2);
v2->outputs.push_back(o3);
v2->outputs.push_back(o4);
// o2->v3->o5
o2->outputs.push_back(v3);
o5->inputs.push_back(v3);
v3->inputs.push_back(o2);
v3->outputs.push_back(o5);
// o3-v4->o5
o3->outputs.push_back(v4);
o5->inputs.push_back(v4);
v4->inputs.push_back(o3);
v4->outputs.push_back(o5);
}
void BuildTwoGraphs(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation);
ir::Node* o3 = g->CreateEmptyNode("op3", Node::Type::kOperation);
ir::Node* o4 = g->CreateEmptyNode("op4", Node::Type::kOperation);
ir::Node* o5 = g->CreateEmptyNode("op5", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
ir::Node* v2 = g->CreateEmptyNode("var2", Node::Type::kVariable);
ir::Node* v3 = g->CreateEmptyNode("var3", Node::Type::kVariable);
ir::Node* v4 = g->CreateEmptyNode("var4", Node::Type::kVariable);
// o1->v1->o2
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
// o2->v2->o3
// o2->v2->o4
o2->outputs.push_back(v2);
o3->inputs.push_back(v2);
o4->inputs.push_back(v2);
v2->inputs.push_back(o2);
v2->outputs.push_back(o3);
v2->outputs.push_back(o4);
// o2->v3->o5
// o2->outputs.push_back(v3);
o5->inputs.push_back(v3);
// v3->inputs.push_back(o2);
v3->outputs.push_back(o5);
// o3-v4->o5
o3->outputs.push_back(v4);
// o5->inputs.push_back(v4);
v4->inputs.push_back(o3);
// v4->outputs.push_back(o5);
}
TEST(GraphHelperTest, GraphNum) {
ProgramDesc prog;
Graph g(prog);
BuildZeroGraph(&g);
ASSERT_EQ(GraphNum(g), 0);
Graph g2(prog);
BuildOneGraph(&g2);
ASSERT_EQ(GraphNum(g2), 1);
Graph g3(prog);
BuildTwoGraphs(&g3);
ASSERT_EQ(GraphNum(g3), 2);
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()( ...@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()(
->AsInput() ->AsInput()
->assert_is_persistable_var() ->assert_is_persistable_var()
->assert_is_op_input("conv2d", "Filter"); ->assert_is_op_input("conv2d", "Filter");
// Bias
auto *conv_bias_var = pattern->NewNode(conv_bias_repr())
->AsInput()
->assert_is_persistable_var()
->assert_is_op_input("conv2d", "Bias");
// intermediate variable, will be removed in the IR after fuse. // intermediate variable, will be removed in the IR after fuse.
auto *conv_out_var = pattern->NewNode(conv_out_repr()) auto *conv_out_var = pattern->NewNode(conv_out_repr())
->AsIntermediate() ->AsIntermediate()
...@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()( ...@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()(
->AsOutput() ->AsOutput()
->assert_is_op_output("relu"); ->assert_is_op_output("relu");
conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var}) conv_op->LinksFrom({conv_input, conv_weight_var}).LinksTo({conv_out_var});
.LinksTo({conv_out_var});
relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var}); relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var});
return relu_out_var; return relu_out_var;
} }
...@@ -698,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x, ...@@ -698,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
} }
} }
PDNode *patterns::Embedding::operator()(PDNode *x) {
x->assert_is_op_input("lookup_table", "Ids");
auto *lookup_table_op =
pattern->NewNode(lookup_table_repr())->assert_is_op("lookup_table");
#define NEW_NODE(arg__, io__) \
auto *arg__ = pattern->NewNode(arg__##_repr()) \
->assert_is_op_##io__("lookup_table", #arg__);
NEW_NODE(W, input);
NEW_NODE(Out, output);
#undef NEW_NODE
lookup_table_op->LinksFrom({x, W});
lookup_table_op->LinksTo({Out});
return Out;
}
PDNode *patterns::LSTM::operator()(PDNode *x) { PDNode *patterns::LSTM::operator()(PDNode *x) {
x->assert_is_op_input("lstm", "Input"); x->assert_is_op_input("lstm", "Input");
auto *lstm_op = pattern->NewNode(lstm_repr())->assert_is_op("lstm"); auto *lstm_op = pattern->NewNode(lstm_repr())->assert_is_op("lstm");
......
...@@ -379,7 +379,7 @@ struct PatternBase { ...@@ -379,7 +379,7 @@ struct PatternBase {
// op: conv + relu // op: conv + relu
// named nodes: // named nodes:
// conv_input, conv_weight, // conv_input, conv_weight,
// conv_bias, conv_out, conv, // conv_out, conv,
// relu_out, relu // relu_out, relu
struct ConvReLU : public PatternBase { struct ConvReLU : public PatternBase {
ConvReLU(PDPattern* pattern, const std::string& name_scope) ConvReLU(PDPattern* pattern, const std::string& name_scope)
...@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase { ...@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase {
PATTERN_DECL_NODE(relu); PATTERN_DECL_NODE(relu);
// declare variable node's name // declare variable node's name
PATTERN_DECL_NODE(conv_weight); PATTERN_DECL_NODE(conv_weight);
PATTERN_DECL_NODE(conv_bias);
PATTERN_DECL_NODE(conv_out); PATTERN_DECL_NODE(conv_out);
PATTERN_DECL_NODE(relu_out); PATTERN_DECL_NODE(relu_out);
}; };
...@@ -419,6 +418,23 @@ struct FC : public PatternBase { ...@@ -419,6 +418,23 @@ struct FC : public PatternBase {
PATTERN_DECL_NODE(Out); PATTERN_DECL_NODE(Out);
}; };
// Embedding
struct Embedding : public PatternBase {
Embedding(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "embedding") {}
PDNode* operator()(PDNode* x);
// declare operator node's name
PATTERN_DECL_NODE(lookup_table);
// Inputs
//
PATTERN_DECL_NODE(Ids);
PATTERN_DECL_NODE(W); // embeddings
// Outputs
PATTERN_DECL_NODE(Out);
};
struct LSTM : public PatternBase { struct LSTM : public PatternBase {
LSTM(PDPattern* pattern, const std::string& name_scope) LSTM(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "lstm") {} : PatternBase(pattern, name_scope, "lstm") {}
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/graph_traits.h"
#include <vector>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
......
...@@ -19,7 +19,6 @@ namespace paddle { ...@@ -19,7 +19,6 @@ namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
std::unique_ptr<Graph> Pass::Apply(std::unique_ptr<Graph> graph) const { std::unique_ptr<Graph> Pass::Apply(std::unique_ptr<Graph> graph) const {
PADDLE_ENFORCE(!applied_, "Pass can only Apply() once.");
PADDLE_ENFORCE(graph.get(), "graph passed to Pass::Apply() cannot be empty."); PADDLE_ENFORCE(graph.get(), "graph passed to Pass::Apply() cannot be empty.");
for (const std::string& attr : required_pass_attrs_) { for (const std::string& attr : required_pass_attrs_) {
PADDLE_ENFORCE(attrs_.find(attr) != attrs_.end(), PADDLE_ENFORCE(attrs_.find(attr) != attrs_.end(),
......
...@@ -42,6 +42,8 @@ class Pass { ...@@ -42,6 +42,8 @@ class Pass {
attr_dels_.clear(); attr_dels_.clear();
} }
std::string Type() const { return type_; }
std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph) const; std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph) const;
// Get a reference to the attributed previously set. // Get a reference to the attributed previously set.
...@@ -52,6 +54,21 @@ class Pass { ...@@ -52,6 +54,21 @@ class Pass {
return *boost::any_cast<AttrType *>(attrs_.at(attr_name)); return *boost::any_cast<AttrType *>(attrs_.at(attr_name));
} }
bool Has(const std::string &attr_name) const {
return attrs_.find(attr_name) != attrs_.end();
}
void Erase(const std::string &attr_name) {
if (!Has(attr_name)) {
return;
}
if (attr_dels_.find(attr_name) != attr_dels_.end()) {
attr_dels_[attr_name]();
attr_dels_.erase(attr_name);
}
attrs_.erase(attr_name);
}
// Set a pointer to the attribute. Pass takes ownership of the attribute. // Set a pointer to the attribute. Pass takes ownership of the attribute.
template <typename AttrType> template <typename AttrType>
void Set(const std::string &attr_name, AttrType *attr) { void Set(const std::string &attr_name, AttrType *attr) {
...@@ -68,13 +85,15 @@ class Pass { ...@@ -68,13 +85,15 @@ class Pass {
// should delete the attribute. // should delete the attribute.
template <typename AttrType> template <typename AttrType>
void SetNotOwned(const std::string &attr_name, AttrType *attr) { void SetNotOwned(const std::string &attr_name, AttrType *attr) {
PADDLE_ENFORCE(attrs_.count(attr_name) == 0); PADDLE_ENFORCE(attrs_.count(attr_name) == 0, "%s already set in the pass",
attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
} }
protected: protected:
virtual std::unique_ptr<Graph> ApplyImpl( virtual std::unique_ptr<Graph> ApplyImpl(std::unique_ptr<Graph> graph) const {
std::unique_ptr<Graph> graph) const = 0; LOG(FATAL) << "Calling virtual Pass not implemented.";
}
private: private:
template <typename PassType> template <typename PassType>
...@@ -89,7 +108,10 @@ class Pass { ...@@ -89,7 +108,10 @@ class Pass {
required_graph_attrs_.insert(attrs.begin(), attrs.end()); required_graph_attrs_.insert(attrs.begin(), attrs.end());
} }
void RegisterType(const std::string &type) { type_ = type; }
mutable bool applied_{false}; mutable bool applied_{false};
std::string type_;
std::unordered_set<std::string> required_pass_attrs_; std::unordered_set<std::string> required_pass_attrs_;
std::unordered_set<std::string> required_graph_attrs_; std::unordered_set<std::string> required_graph_attrs_;
std::map<std::string, boost::any> attrs_; std::map<std::string, boost::any> attrs_;
...@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar { ...@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
PADDLE_ENFORCE(!PassRegistry::Instance().Has(pass_type), PADDLE_ENFORCE(!PassRegistry::Instance().Has(pass_type),
"'%s' is registered more than once.", pass_type); "'%s' is registered more than once.", pass_type);
PassRegistry::Instance().Insert( PassRegistry::Instance().Insert(
pass_type, [this]() -> std::unique_ptr<Pass> { pass_type, [this, pass_type]() -> std::unique_ptr<Pass> {
std::unique_ptr<Pass> pass(new PassType()); std::unique_ptr<Pass> pass(new PassType());
pass->RegisterRequiredPassAttrs(this->required_pass_attrs_); pass->RegisterRequiredPassAttrs(this->required_pass_attrs_);
pass->RegisterRequiredGraphAttrs(this->required_graph_attrs_); pass->RegisterRequiredGraphAttrs(this->required_graph_attrs_);
pass->RegisterType(pass_type);
return pass; return pass;
}); });
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/pass_builder.h"
namespace paddle {
namespace framework {
namespace ir {
std::shared_ptr<Pass> PassBuilder::AppendPass(const std::string& pass_type) {
auto pass = ir::PassRegistry::Instance().Get(pass_type);
passes_.emplace_back(pass.release());
return passes_.back();
}
void PassBuilder::RemovePass(size_t idx) {
PADDLE_ENFORCE(passes_.size() > idx);
passes_.erase(passes_.begin() + idx);
}
std::shared_ptr<Pass> PassBuilder::InsertPass(size_t idx,
const std::string& pass_type) {
PADDLE_ENFORCE(passes_.size() >= idx);
std::shared_ptr<Pass> pass(
ir::PassRegistry::Instance().Get(pass_type).release());
passes_.insert(passes_.begin() + idx, std::move(pass));
return passes_[idx];
}
} // namespace ir
} // namespace framework
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class PassBuilder {
public:
PassBuilder() {}
virtual ~PassBuilder() {}
// Append a new pass to the end.
std::shared_ptr<Pass> AppendPass(const std::string& pass_type);
// Insert a new pass after `idx`.
std::shared_ptr<Pass> InsertPass(size_t idx, const std::string& pass_type);
// Remove a new pass at `idx`.
void RemovePass(size_t idx);
// Returns a list of all passes.
std::vector<std::shared_ptr<Pass>> AllPasses() const { return passes_; }
protected:
std::vector<std::shared_ptr<Pass>> passes_;
};
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) { ...@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
ASSERT_EQ(graph->Get<int>("copy_test_pass_attr"), 2); ASSERT_EQ(graph->Get<int>("copy_test_pass_attr"), 2);
ASSERT_EQ(graph->Get<int>("copy_test_graph_attr"), 2); ASSERT_EQ(graph->Get<int>("copy_test_graph_attr"), 2);
try { // Allow apply more than once.
graph.reset(new Graph(prog));
graph->Set<int>("test_graph_attr", new int);
graph = pass->Apply(std::move(graph)); graph = pass->Apply(std::move(graph));
} catch (paddle::platform::EnforceNotMet e) {
exception = std::string(e.what());
}
ASSERT_TRUE(exception.find("Pass can only Apply() once") != exception.npos);
pass = PassRegistry::Instance().Get("test_pass"); pass = PassRegistry::Instance().Get("test_pass");
pass->SetNotOwned<int>("test_pass_attr", &val); pass->SetNotOwned<int>("test_pass_attr", &val);
......
...@@ -17,10 +17,13 @@ ...@@ -17,10 +17,13 @@
#include <algorithm> #include <algorithm>
#include <initializer_list> #include <initializer_list>
#include <memory> #include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h" #include "glog/logging.h"
...@@ -28,173 +31,167 @@ namespace paddle { ...@@ -28,173 +31,167 @@ namespace paddle {
namespace framework { namespace framework {
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
// Vector<T> implements the std::vector interface, and can get Data or namespace details {
// MutableData from any place. The data will be synced implicitly inside. struct CUDABuffer {
template <typename T> void *data_{nullptr};
class Vector { size_t size_{0};
public: platform::CUDAPlace place_;
using value_type = T;
// Default ctor. Create empty Vector CUDABuffer() {}
Vector() { InitEmpty(); } CUDABuffer(platform::Place place, size_t size)
: size_(size), place_(boost::get<platform::CUDAPlace>(place)) {
// Fill vector with value. The vector size is `count`. data_ = memory::Alloc(place_, size);
explicit Vector(size_t count, const T &value = T()) {
InitEmpty();
if (count != 0) {
resize(count);
T *ptr = begin();
for (size_t i = 0; i < count; ++i) {
ptr[i] = value;
}
}
} }
// Ctor with init_list ~CUDABuffer() { ClearMemory(); }
Vector(std::initializer_list<T> init) {
if (init.size() == 0) { CUDABuffer(const CUDABuffer &o) = delete;
InitEmpty(); CUDABuffer &operator=(const CUDABuffer &o) = delete;
} else {
InitByIter(init.size(), init.begin(), init.end()); void Resize(platform::Place place, size_t size) {
ClearMemory();
place_ = boost::get<platform::CUDAPlace>(place);
data_ = memory::Alloc(place_, size);
PADDLE_ENFORCE_NOT_NULL(data_);
size_ = size;
} }
void Swap(CUDABuffer &o) {
std::swap(data_, o.data_);
std::swap(place_, o.place_);
std::swap(size_, o.size_);
} }
// implicit cast from std::vector. private:
template <typename U> void ClearMemory() const {
Vector(const std::vector<U> &dat) { // NOLINT if (data_ != nullptr) {
if (dat.size() == 0) { memory::Free(place_, data_);
InitEmpty();
} else {
InitByIter(dat.size(), dat.begin(), dat.end());
} }
} }
};
} // namespace details
// Copy ctor // Vector<T> implements the std::vector interface, and can get Data or
Vector(const Vector<T> &other) { this->operator=(other); } // MutableData from any place. The data will be synced implicitly inside.
template <typename T>
class Vector {
public:
using value_type = T;
using iterator = typename std::vector<T>::iterator;
using const_iterator = typename std::vector<T>::const_iterator;
// Copy operator private:
Vector<T> &operator=(const Vector<T> &other) { // The actual class to implement vector logic
if (other.size() != 0) { class VectorData {
this->InitByIter(other.size(), other.begin(), other.end()); public:
} else { VectorData() : flag_(kDataInCPU) {}
InitEmpty(); VectorData(size_t count, const T &value)
} : cpu_(count, value), flag_(kDataInCPU) {}
return *this; VectorData(std::initializer_list<T> init) : cpu_(init), flag_(kDataInCPU) {}
} template <typename U>
explicit VectorData(const std::vector<U> &dat)
: cpu_(dat), flag_(kDataInCPU) {}
~VectorData() {}
// Move ctor VectorData(const VectorData &o) {
Vector(Vector<T> &&other) { o.ImmutableCPU();
this->size_ = other.size_; cpu_ = o.cpu_;
this->flag_ = other.flag_; flag_ = kDataInCPU;
if (other.cuda_vec_.memory_size()) {
this->cuda_vec_.ShareDataWith(other.cuda_vec_);
}
if (other.cpu_vec_.memory_size()) {
this->cpu_vec_.ShareDataWith(other.cpu_vec_);
} }
VectorData &operator=(const VectorData &o) {
o.ImmutableCPU();
cpu_ = o.cpu_;
flag_ = kDataInCPU;
details::CUDABuffer null;
gpu_.Swap(null);
return *this;
} }
// CPU data access method. Mutable.
T &operator[](size_t i) { T &operator[](size_t i) {
MutableCPU(); MutableCPU();
return const_cast<T *>(cpu_vec_.data<T>())[i]; return cpu_[i];
} }
// CPU data access method. Immutable.
const T &operator[](size_t i) const { const T &operator[](size_t i) const {
ImmutableCPU(); ImmutableCPU();
return cpu_vec_.data<T>()[i]; return cpu_[i];
} }
// std::vector iterator methods. Based on CPU data access method size_t size() const { return cpu_.size(); }
size_t size() const { return size_; }
T *begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); } iterator begin() {
MutableCPU();
return cpu_.begin();
}
T *end() { iterator end() {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); MutableCPU();
return cpu_.end();
} }
T &front() { return *begin(); } T &front() {
MutableCPU();
return cpu_.front();
}
T &back() { T &back() {
auto it = end(); MutableCPU();
--it; return cpu_.back();
return *it;
} }
const T *begin() const { const_iterator begin() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); ImmutableCPU();
return cpu_.begin();
} }
const T *end() const { const_iterator end() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); ImmutableCPU();
return cpu_.end();
} }
const T *cbegin() const { return begin(); }
const T *cend() const { return end(); }
const T &back() const { const T &back() const {
auto it = end(); ImmutableCPU();
--it; return cpu_.back();
return *it;
} }
T *data() { return begin(); } T *data() { return &(*this)[0]; }
const T *data() const { return begin(); } const T *data() const { return &(*this)[0]; }
const T &front() const { return *begin(); } const T &front() const {
// end of std::vector iterator methods ImmutableCPU();
return cpu_.front();
}
// assign this from iterator. // assign this from iterator.
// NOTE: the iterator must support `end-begin` // NOTE: the iterator must support `end-begin`
template <typename Iter> template <typename Iter>
void assign(Iter begin, Iter end) { void assign(Iter begin, Iter end) {
InitByIter(end - begin, begin, end); MutableCPU();
cpu_.assign(begin, end);
} }
// push_back. If the previous capacity is not enough, the memory will // push_back. If the previous capacity is not enough, the memory will
// double. // double.
void push_back(T elem) { void push_back(T elem) {
if (size_ + 1 > capacity()) { MutableCPU();
reserve((size_ + 1) << 1); cpu_.push_back(elem);
}
*end() = elem;
++size_;
} }
// extend a vector by iterator. // extend a vector by iterator.
// NOTE: the iterator must support end-begin // NOTE: the iterator must support end-begin
template <typename It> template <typename It>
void Extend(It begin, It end) { void Extend(It begin, It end) {
size_t pre_size = size_; MutableCPU();
resize(pre_size + (end - begin)); auto out_it = std::back_inserter<std::vector<T>>(this->cpu_);
T *ptr = this->begin() + pre_size; std::copy(begin, end, out_it);
for (; begin < end; ++begin, ++ptr) {
*ptr = *begin;
}
} }
// resize the vector // resize the vector
void resize(size_t size) { void resize(size_t size) {
if (size + 1 <= capacity()) {
size_ = size;
} else {
MutableCPU(); MutableCPU();
Tensor cpu_tensor; cpu_.resize(size);
platform::Place cpu = platform::CPUPlace();
T *ptr = cpu_tensor.mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(size)}), cpu);
const T *old_ptr =
cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + size_, ptr);
}
size_ = size;
cpu_vec_.ShareDataWith(cpu_tensor);
}
} }
// get cuda ptr. immutable // get cuda ptr. immutable
...@@ -202,7 +199,7 @@ class Vector { ...@@ -202,7 +199,7 @@ class Vector {
PADDLE_ENFORCE(platform::is_gpu_place(place), PADDLE_ENFORCE(platform::is_gpu_place(place),
"CUDA Data must on CUDA place"); "CUDA Data must on CUDA place");
ImmutableCUDA(place); ImmutableCUDA(place);
return cuda_vec_.data<T>(); return reinterpret_cast<T *>(gpu_.data_);
} }
// get cuda ptr. mutable // get cuda ptr. mutable
...@@ -214,77 +211,39 @@ class Vector { ...@@ -214,77 +211,39 @@ class Vector {
// clear // clear
void clear() { void clear() {
size_ = 0; cpu_.clear();
flag_ = kDirty | kDataInCPU; flag_ = kDirty | kDataInCPU;
} }
size_t capacity() const { size_t capacity() const { return cpu_.capacity(); }
return cpu_vec_.memory_size() / SizeOfType(typeid(T));
}
// reserve data // reserve data
void reserve(size_t size) { void reserve(size_t size) const { cpu_.reserve(size); }
size_t pre_size = size_;
resize(size);
resize(pre_size);
}
// the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const {
if (platform::is_gpu_place(place)) {
return CUDAData(place);
} else {
return data();
}
}
// the unify method to access CPU or CUDA data. mutable.
T *MutableData(platform::Place place) {
if (platform::is_gpu_place(place)) {
return CUDAMutableData(place);
} else {
return data();
}
}
// implicit cast operator. Vector can be cast to std::vector implicitly. // implicit cast operator. Vector can be cast to std::vector implicitly.
operator std::vector<T>() const { operator std::vector<T>() const {
std::vector<T> result; ImmutableCPU();
result.resize(size()); return cpu_;
std::copy(begin(), end(), result.begin());
return result;
} }
bool operator==(const Vector<T> &other) const { bool operator==(const VectorData &other) const {
if (size() != other.size()) return false; ImmutableCPU();
auto it1 = cbegin(); other.ImmutableCPU();
auto it2 = other.cbegin(); return cpu_ == other.cpu_;
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}
}
return true;
} }
private: std::mutex &Mutex() const { return mtx_; }
void InitEmpty() {
size_ = 0;
flag_ = kDataInCPU;
}
template <typename Iter> std::unique_ptr<platform::CUDAPlace> CUDAPlace() const {
void InitByIter(size_t size, Iter begin, Iter end) { if (gpu_.data_ == nullptr) {
platform::Place cpu = platform::CPUPlace(); return nullptr;
T *ptr = this->cpu_vec_.template mutable_data<T>( } else {
framework::make_ddim({static_cast<int64_t>(size)}), cpu); return std::unique_ptr<platform::CUDAPlace>(
for (size_t i = 0; i < size; ++i) { new platform::CUDAPlace(gpu_.place_));
*ptr++ = *begin++; }
}
flag_ = kDataInCPU | kDirty;
size_ = size;
} }
private:
enum DataFlag { enum DataFlag {
kDataInCPU = 0x01, kDataInCPU = 0x01,
kDataInCUDA = 0x02, kDataInCUDA = 0x02,
...@@ -294,8 +253,15 @@ class Vector { ...@@ -294,8 +253,15 @@ class Vector {
void CopyToCPU() const { void CopyToCPU() const {
// COPY GPU Data To CPU // COPY GPU Data To CPU
TensorCopy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
WaitPlace(cuda_vec_.place()); platform::DeviceContextPool::Instance().Get(
platform::Place(gpu_.place_)));
auto stream = dev_ctx->stream();
void *src = gpu_.data_;
void *dst = cpu_.data();
memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_,
stream);
dev_ctx->Wait();
} }
void MutableCPU() { void MutableCPU() {
...@@ -308,16 +274,12 @@ class Vector { ...@@ -308,16 +274,12 @@ class Vector {
void ImmutableCUDA(platform::Place place) const { void ImmutableCUDA(platform::Place place) const {
if (IsDirty()) { if (IsDirty()) {
if (IsInCPU()) { if (IsInCPU()) {
TensorCopy(cpu_vec_, boost::get<platform::CUDAPlace>(place), CopyCPUDataToCUDA(place);
&cuda_vec_);
WaitPlace(place);
UnsetFlag(kDirty); UnsetFlag(kDirty);
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (IsInCUDA() && !(place == cuda_vec_.place())) { } else if (IsInCUDA() &&
framework::Tensor tmp; !(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
TensorCopy(cuda_vec_, boost::get<platform::CUDAPlace>(place), &tmp); PADDLE_THROW("This situation should not happen");
WaitPlace(cuda_vec_.place());
cuda_vec_.ShareDataWith(tmp);
// Still dirty // Still dirty
} else { } else {
// Dirty && DataInCUDA && Device is same // Dirty && DataInCUDA && Device is same
...@@ -326,17 +288,10 @@ class Vector { ...@@ -326,17 +288,10 @@ class Vector {
} else { } else {
if (!IsInCUDA()) { if (!IsInCUDA()) {
// Even data is not dirty. However, data is not in CUDA. Copy data. // Even data is not dirty. However, data is not in CUDA. Copy data.
TensorCopy(cpu_vec_, boost::get<platform::CUDAPlace>(place), CopyCPUDataToCUDA(place);
&cuda_vec_);
WaitPlace(place);
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (!(place == cuda_vec_.place())) { } else if (!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
framework::Tensor tmp; PADDLE_THROW("This situation should not happen.");
WaitPlace(cuda_vec_.place());
TensorCopy(cuda_vec_, boost::get<platform::CUDAPlace>(place), &tmp);
WaitPlace(cuda_vec_.place());
WaitPlace(place);
cuda_vec_.ShareDataWith(tmp);
} else { } else {
// Not Dirty && DataInCUDA && Device is same // Not Dirty && DataInCUDA && Device is same
// Do nothing. // Do nothing.
...@@ -344,9 +299,20 @@ class Vector { ...@@ -344,9 +299,20 @@ class Vector {
} }
} }
void CopyCPUDataToCUDA(const platform::Place &place) const {
void *src = cpu_.data();
gpu_.Resize(place, cpu_.size() * sizeof(T));
void *dst = gpu_.data_;
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_,
stream);
}
void ImmutableCPU() const { void ImmutableCPU() const {
if (IsDirty() && if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
!IsInCPU()) { // If data has been changed in CUDA, or CPU has no data. // CPU has no data.
CopyToCPU(); CopyToCPU();
UnsetFlag(kDirty); UnsetFlag(kDirty);
} }
...@@ -362,23 +328,178 @@ class Vector { ...@@ -362,23 +328,178 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; } bool IsInCPU() const { return flag_ & kDataInCPU; }
static void WaitPlace(const platform::Place place) { mutable std::vector<T> cpu_;
mutable details::CUDABuffer gpu_;
mutable int flag_;
mutable std::mutex mtx_;
};
public:
// Default ctor. Create empty Vector
Vector() : m_(new VectorData()) {}
// Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T &value = T())
: m_(new VectorData(count, value)) {}
// Ctor with init_list
Vector(std::initializer_list<T> init) : m_(new VectorData(init)) {}
// implicit cast from std::vector.
template <typename U>
Vector(const std::vector<U> &dat) : m_(new VectorData(dat)) { // NOLINT
}
// Copy ctor
Vector(const Vector<T> &other) { m_ = other.m_; }
// Copy operator
Vector<T> &operator=(const Vector<T> &other) {
m_ = other.m_;
return *this;
}
// Move ctor
Vector(Vector<T> &&other) { m_ = std::move(other.m_); }
// CPU data access method. Mutable.
T &operator[](size_t i) { return (*m_.MutableData())[i]; }
// CPU data access method. Immutable.
const T &operator[](size_t i) const { return m_.Data()[i]; }
// std::vector iterator methods. Based on CPU data access method
size_t size() const { return m_.Data().size(); }
iterator begin() { return m_.MutableData()->begin(); }
iterator end() { return m_.MutableData()->end(); }
T &front() { return m_.MutableData()->front(); }
T &back() { return m_.MutableData()->back(); }
const_iterator begin() const { return m_.Data().begin(); }
const_iterator end() const { return m_.Data().end(); }
const_iterator cbegin() const { return begin(); }
const_iterator cend() const { return end(); }
const T &back() const { return m_.Data().back(); }
T *data() { return m_.MutableData()->data(); }
const T *data() const { return m_.Data().data(); }
const T &front() const { return m_.Data().front(); }
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template <typename Iter>
void assign(Iter begin, Iter end) {
m_.MutableData()->assign(begin, end);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void push_back(T elem) { m_.MutableData()->push_back(elem); }
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template <typename It>
void Extend(It begin, It end) {
m_.MutableData()->Extend(begin, end);
}
// resize the vector
void resize(size_t size) {
if (m_.Data().size() != size) {
m_.MutableData()->resize(size);
}
}
// get cuda ptr. immutable
const T *CUDAData(platform::Place place) const {
{
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.Data().CUDAData(place);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_.Detach();
return CUDAData(place);
}
// get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) {
{
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.MutableData()->CUDAMutableData(place);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_.Detach();
return CUDAMutableData(place);
}
// clear
void clear() { m_.MutableData()->clear(); }
size_t capacity() const { return m_.Data().capacity(); }
// reserve data
void reserve(size_t size) { m_.Data().reserve(size); }
// the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const {
if (platform::is_gpu_place(place)) {
return CUDAData(place);
} else {
return data();
}
}
// the unify method to access CPU or CUDA data. mutable.
T *MutableData(platform::Place place) {
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
platform::DeviceContextPool::Instance() return CUDAMutableData(place);
.Get(boost::get<platform::CUDAPlace>(place)) } else {
->Wait(); return data();
} }
} }
static T &EmptyDummy() { // implicit cast operator. Vector can be cast to std::vector implicitly.
static T dummy = T(); operator std::vector<T>() const { return m_.Data(); }
return dummy;
bool operator==(const Vector<T> &other) const {
if (size() != other.size()) return false;
auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}
}
return true;
} }
mutable int flag_; const void *Handle() const { return &m_.Data(); }
mutable Tensor cpu_vec_;
mutable Tensor cuda_vec_; private:
size_t size_; // Vector is an COW object.
mutable details::COWPtr<VectorData> m_;
}; };
#else // PADDLE_WITH_CUDA #else // PADDLE_WITH_CUDA
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
// These code can be shared with Executor.
static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
if (var_type == proto::VarType::LOD_TENSOR) {
var->GetMutable<LoDTensor>();
} else if (var_type == proto::VarType::SELECTED_ROWS) {
var->GetMutable<SelectedRows>();
} else if (var_type == proto::VarType::FEED_MINIBATCH) {
var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::FETCH_LIST) {
var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>();
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
var->GetMutable<LoDTensorArray>();
} else if (var_type == proto::VarType::PLACE_LIST) {
var->GetMutable<platform::PlaceList>();
} else if (var_type == proto::VarType::READER) {
var->GetMutable<ReaderHolder>();
} else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator
} else {
PADDLE_THROW(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
var_type);
}
}
void NaiveExecutor::Prepare(Scope *parent_scope,
const ProgramDesc &program_desc, int block_id,
bool with_feed_fetch_ops) {
if (!parent_scope) {
scope_ = new framework::Scope;
} else {
scope_ = &parent_scope->NewScope();
}
CreateVariables(program_desc, scope_, block_id);
CreateOps(program_desc, block_id, with_feed_fetch_ops);
}
void NaiveExecutor::Run() {
for (auto &op : ops_) {
VLOG(4) << "run " << op->Type();
op->Run(*scope_, place_);
}
}
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
int block_id) {
PADDLE_ENFORCE(scope);
auto &global_block = desc.Block(block_id);
const Scope *ancestor_scope = scope;
while (ancestor_scope->parent()) {
ancestor_scope = ancestor_scope->parent();
}
if (ancestor_scope != scope) {
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
// Create persistable vars in ancestor scope.
if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}
void NaiveExecutor::CreateOps(const ProgramDesc &desc, int block_id,
bool with_feed_fetch_ops) {
for (const auto &op_desc : desc.Block(block_id).AllOps()) {
if (!with_feed_fetch_ops &&
(op_desc->Type() == "feed" || op_desc->Type() == "fetch")) {
string::PrettyLogEndl(string::Style::detail(), "--- skip [%s], %s -> %s",
op_desc->Input("X")[0], op_desc->Type(),
op_desc->Output("Out")[0]);
continue;
}
ops_.emplace_back(OpRegistry::CreateOp(*op_desc));
}
}
LoDTensor *NaiveExecutor::FindTensor(const std::string &name) {
PADDLE_ENFORCE(scope_, "Need to init scope first");
auto *var = scope_->FindVar(name);
PADDLE_ENFORCE(var, "No variable [%s] in the scope");
auto *tensor = const_cast<LoDTensor *>(&var->Get<LoDTensor>());
return tensor;
}
void NaiveExecutor::CleanFeedFetchOps() {
std::vector<std::unique_ptr<OperatorBase>> ops;
for (auto &op : ops_) {
if (op->Type() != "feed" && op->Type() != "fetch") {
ops.emplace_back(std::move(op));
}
}
ops_.swap(ops);
}
void NaiveExecutor::EnableMKLDNN(const ProgramDesc &program) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True";
for (size_t block_id = 0; block_id < program.Size(); ++block_id) {
auto *block = const_cast<ProgramDesc &>(program).MutableBlock(block_id);
for (auto *op : block->AllOps()) {
if (op->HasAttr("use_mkldnn")) {
op->SetAttr("use_mkldnn", true);
}
}
}
#else
LOG(WARNING)
<< "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
#endif
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace framework {
/*
* Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference.
*/
class NaiveExecutor {
public:
explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
int block_id, bool with_feed_fetch_ops);
// Run all the operators.
void Run();
// Get an tensor to operating directly, without the need for feed_ops.
LoDTensor* FindTensor(const std::string& name);
Scope* scope() { return scope_; }
void CleanFeedFetchOps();
void EnableMKLDNN(const ProgramDesc& program);
protected:
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
void CreateOps(const ProgramDesc& desc, int block_id,
bool with_feed_fetch_ops);
private:
const platform::Place place_;
// Catch the required resource to avoid recreate.
std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include <gtest/gtest.h>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
namespace paddle {
namespace framework {
TEST(NaiveExecutor, Basic) {
ProgramDesc program;
auto* main_block = program.MutableBlock(0);
auto* a = main_block->Var("a"); // input
auto* b = main_block->Var("b"); // input
auto* c = main_block->Var("c"); // input
a->SetType(proto::VarType::LOD_TENSOR);
b->SetType(proto::VarType::LOD_TENSOR);
c->SetType(proto::VarType::LOD_TENSOR);
auto* add = main_block->AppendOp();
add->SetType("elementwise_add");
add->SetInput("X", {"a"});
add->SetInput("Y", {"b"});
add->SetOutput("Out", {"c"});
auto place = platform::CPUPlace();
NaiveExecutor exe(place);
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
auto* a_tensor = exe.FindTensor("a");
auto* b_tensor = exe.FindTensor("b");
auto* c_tensor = exe.FindTensor("c");
a_tensor->Resize({1, 4});
b_tensor->Resize({1, 4});
c_tensor->Resize({1, 4});
b_tensor->mutable_data<float>(place);
a_tensor->mutable_data<float>(place);
float a_arr[] = {0, 1, 2, 3};
float b_arr[] = {0.0, .1, .2, .3};
std::copy_n(a_arr, 4, a_tensor->mutable_data<float>(place));
std::copy_n(b_arr, 4, b_tensor->mutable_data<float>(place));
exe.Run();
auto* c_data = c_tensor->mutable_data<float>(place);
for (int i = 0; i < 4; i++) {
EXPECT_NEAR(c_data[i], 1.1 * i, 1e-3);
}
}
} // namespace framework
} // namespace paddle
USE_OP(elementwise_add);
...@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and ...@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h" #include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -35,80 +33,6 @@ limitations under the License. */ ...@@ -35,80 +33,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::unique_ptr<ir::Graph> ApplyParallelExecutorPass(
const ProgramDesc &main_program, const std::vector<platform::Place> &places,
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes, const bool use_cuda,
#ifdef PADDLE_WITH_CUDA
const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) {
#else
const BuildStrategy &strategy) {
#endif
// Convert the program to graph.
std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));
// Apply a graph viz pass to record a graph.
if (!strategy.debug_graphviz_path_.empty()) {
auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass");
const std::string graph_path = string::Sprintf(
"%s%s", strategy.debug_graphviz_path_.c_str(), "_original_graph");
viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
graph = viz_pass->Apply(std::move(graph));
}
// Apply op fusion.
if (strategy.fuse_elewise_add_act_ops_) {
auto fuse_elewise_add_act_pass =
ir::PassRegistry::Instance().Get("fuse_elewise_add_act_pass");
graph = fuse_elewise_add_act_pass->Apply(std::move(graph));
// Apply a graph viz pass to record a graph.
if (!strategy.debug_graphviz_path_.empty()) {
auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass");
const std::string graph_path = string::Sprintf(
"%s%s", strategy.debug_graphviz_path_.c_str(), "_fused_graph");
viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
graph = viz_pass->Apply(std::move(graph));
}
}
// Convert graph to run on multi-devices.
auto multi_devices_pass =
ir::PassRegistry::Instance().Get("multi_devices_pass");
multi_devices_pass->SetNotOwned<const std::vector<platform::Place>>("places",
&places);
multi_devices_pass->SetNotOwned<const std::string>("loss_var_name",
&loss_var_name);
multi_devices_pass->SetNotOwned<const std::unordered_set<std::string>>(
"params", &param_names);
multi_devices_pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
&local_scopes);
multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy", &strategy);
#ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
multi_devices_pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
#endif
graph = multi_devices_pass->Apply(std::move(graph));
// Apply a graph print pass to record a graph with device info.
if (!strategy.debug_graphviz_path_.empty()) {
auto multi_devices_print_pass =
ir::PassRegistry::Instance().Get("multi_devices_print_pass");
multi_devices_print_pass->SetNotOwned<const std::string>(
"debug_graphviz_path", &strategy.debug_graphviz_path_);
multi_devices_print_pass->Set<details::GraphvizSSAGraphPrinter>(
"graph_printer", new details::GraphvizSSAGraphPrinter);
graph = multi_devices_print_pass->Apply(std::move(graph));
}
// Verify that the graph is correct for multi-device executor.
auto multi_devices_check_pass =
ir::PassRegistry::Instance().Get("multi_devices_check_pass");
graph = multi_devices_check_pass->Apply(std::move(graph));
return graph;
}
class ParallelExecutorPrivate { class ParallelExecutorPrivate {
public: public:
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places) explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
...@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor( ...@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert // Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp // ncclOp
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::unique_ptr<ir::Graph> graph = ApplyParallelExecutorPass( std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
main_program, member_->places_, loss_var_name, params, main_program, member_->places_, loss_var_name, params,
member_->local_scopes_, member_->use_cuda_, build_strategy, member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get());
member_->nccl_ctxs_.get());
auto max_memory_size = GetEagerDeletionThreshold(); auto max_memory_size = GetEagerDeletionThreshold();
if (max_memory_size >= 0) { if (max_memory_size >= 0) {
...@@ -228,11 +151,19 @@ ParallelExecutor::ParallelExecutor( ...@@ -228,11 +151,19 @@ ParallelExecutor::ParallelExecutor(
} }
} }
#else #else
std::unique_ptr<ir::Graph> graph = ApplyParallelExecutorPass( std::unique_ptr<ir::Graph> graph =
main_program, member_->places_, loss_var_name, params, build_strategy.Apply(main_program, member_->places_, loss_var_name,
member_->local_scopes_, member_->use_cuda_, build_strategy); params, member_->local_scopes_, member_->use_cuda_);
#endif #endif
if (VLOG_IS_ON(5)) {
// If the loss_var_name is given, the number of graph should be only one.
if (loss_var_name.size()) {
PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1,
"The number of graph should be only one");
}
}
if (exec_strategy.type_ == ExecutionStrategy::kDefault) { if (exec_strategy.type_ == ExecutionStrategy::kDefault) {
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, places, std::move(graph))); exec_strategy, member_->local_scopes_, places, std::move(graph)));
...@@ -319,6 +250,13 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -319,6 +250,13 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (!gcs_.empty()) { if (!gcs_.empty()) {
ResetReferenceCount(); ResetReferenceCount();
for (auto &pair : cur_ref_cnts_) {
auto &name_map = *(pair.second);
for (auto &fetch_name : fetch_tensors) {
name_map.erase(fetch_name);
}
name_map.erase(fetched_var_name);
}
} }
#endif #endif
auto fetch_data = member_->executor_->Run(fetch_tensors); auto fetch_data = member_->executor_->Run(fetch_tensors);
...@@ -373,12 +311,6 @@ ParallelExecutor::~ParallelExecutor() { ...@@ -373,12 +311,6 @@ ParallelExecutor::~ParallelExecutor() {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
USE_PASS(fuse_elewise_add_act_pass);
USE_PASS(graph_viz_pass);
USE_PASS(multi_devices_pass);
USE_PASS(multi_devices_check_pass);
USE_PASS(multi_devices_print_pass);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
USE_PASS(reference_count_pass); USE_PASS(reference_count_pass);
#endif #endif
...@@ -14,14 +14,14 @@ limitations under the License. */ ...@@ -14,14 +14,14 @@ limitations under the License. */
#pragma once #pragma once
#include <paddle/fluid/framework/details/build_strategy.h>
#include <atomic> #include <atomic>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h" #include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
......
...@@ -46,6 +46,7 @@ struct RWLock { ...@@ -46,6 +46,7 @@ struct RWLock {
private: private:
pthread_rwlock_t lock_; pthread_rwlock_t lock_;
}; };
// TODO(paddle-dev): Support RWLock for WIN32 for correctness.
#else #else
// https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive // https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
// In windows, rw_lock seems like a hack. Use empty object and do nothing. // In windows, rw_lock seems like a hack. Use empty object and do nothing.
......
...@@ -27,8 +27,11 @@ class SelectedRowsTester : public ::testing::Test { ...@@ -27,8 +27,11 @@ class SelectedRowsTester : public ::testing::Test {
selected_rows_.reset(new SelectedRows(rows, height)); selected_rows_.reset(new SelectedRows(rows, height));
Tensor* value = selected_rows_->mutable_value(); Tensor* value = selected_rows_->mutable_value();
value->mutable_data<float>( auto* data = value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows.size()), row_numel}), place_); make_ddim({static_cast<int64_t>(rows.size()), row_numel}), place_);
for (int64_t i = 0; i < value->numel(); ++i) {
data[i] = static_cast<float>(i);
}
} }
protected: protected:
...@@ -60,6 +63,10 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) { ...@@ -60,6 +63,10 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
ASSERT_EQ(selected_rows_->height(), dst_tensor.height()); ASSERT_EQ(selected_rows_->height(), dst_tensor.height());
ASSERT_EQ(selected_rows_->value().dims(), dst_tensor.value().dims()); ASSERT_EQ(selected_rows_->value().dims(), dst_tensor.value().dims());
ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims()); ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims());
auto* dst_data = dst_tensor.value().data<float>();
for (int64_t i = 0; i < dst_tensor.value().numel(); ++i) {
ASSERT_EQ(dst_data[i], static_cast<float>(i));
}
} }
TEST(SelectedRows, SparseTable) { TEST(SelectedRows, SparseTable) {
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
......
此差异已折叠。
...@@ -87,8 +87,6 @@ class VarDesc { ...@@ -87,8 +87,6 @@ class VarDesc {
void SetDataTypes( void SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type); const std::vector<proto::VarType::Type> &multiple_data_type);
void SetCapacity(int64_t capacity);
proto::VarType::Type GetDataType() const; proto::VarType::Type GetDataType() const;
std::vector<proto::VarType::Type> GetDataTypes() const; std::vector<proto::VarType::Type> GetDataTypes() const;
...@@ -110,10 +108,8 @@ class VarDesc { ...@@ -110,10 +108,8 @@ class VarDesc {
void SetPersistable(bool persistable) { desc_.set_persistable(persistable); } void SetPersistable(bool persistable) { desc_.set_persistable(persistable); }
private: private:
const proto::VarType::ChannelDesc &channel_desc() const;
const proto::VarType::TensorDesc &tensor_desc() const; const proto::VarType::TensorDesc &tensor_desc() const;
std::vector<proto::VarType::TensorDesc> tensor_descs() const; std::vector<proto::VarType::TensorDesc> tensor_descs() const;
proto::VarType::ChannelDesc *mutable_channel_desc();
proto::VarType::TensorDesc *mutable_tensor_desc(); proto::VarType::TensorDesc *mutable_tensor_desc();
std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs(); std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs();
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -41,8 +40,6 @@ inline proto::VarType::Type ToVarType(std::type_index type) { ...@@ -41,8 +40,6 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
return proto::VarType_Type_SELECTED_ROWS; return proto::VarType_Type_SELECTED_ROWS;
} else if (IsType<ReaderHolder>(type)) { } else if (IsType<ReaderHolder>(type)) {
return proto::VarType_Type_READER; return proto::VarType_Type_READER;
} else if (IsType<ChannelHolder>(type)) {
return proto::VarType_Type_CHANNEL;
} else { } else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
} }
...@@ -66,9 +63,6 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) { ...@@ -66,9 +63,6 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
case proto::VarType_Type_READER: case proto::VarType_Type_READER:
visitor(var.Get<ReaderHolder>()); visitor(var.Get<ReaderHolder>());
return; return;
case proto::VarType_Type_CHANNEL:
visitor(var.Get<ChannelHolder>());
return;
default: default:
PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type())); PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
} }
......
...@@ -66,6 +66,7 @@ class Analyzer : public OrderedRegistry<PassManager> { ...@@ -66,6 +66,7 @@ class Analyzer : public OrderedRegistry<PassManager> {
// Manual update the passes here. // Manual update the passes here.
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
"attention_lstm_fuse_pass", // "attention_lstm_fuse_pass", //
"embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", // "fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", // "mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", // "fc_gru_fuse_pass", //
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册