提交 eff92d04 编写于 作者: T tangwei12

merge develop

...@@ -25,7 +25,6 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " ...@@ -25,7 +25,6 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
find_package(Sphinx)
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
find_package(CUDA QUIET) find_package(CUDA QUIET)
endif(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING)
...@@ -226,5 +225,7 @@ if(WITH_PYTHON) ...@@ -226,5 +225,7 @@ if(WITH_PYTHON)
endif() endif()
if(WITH_DOC) if(WITH_DOC)
find_package(Sphinx REQUIRED)
find_python_module(recommonmark REQUIRED)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
...@@ -62,9 +62,9 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl ...@@ -62,9 +62,9 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
## Installation ## Installation
It is recommended to check out the It is recommended to check out the
[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html) [Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/docker_install_en.html)
before looking into the before looking into the
[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/build_from_source_en.html). [build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html).
## Documentation ## Documentation
......
...@@ -159,6 +159,7 @@ def run_benchmark(model, args): ...@@ -159,6 +159,7 @@ def run_benchmark(model, args):
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.metrics.Accuracy() accuracy = fluid.metrics.Accuracy()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
...@@ -175,17 +176,20 @@ def run_benchmark(model, args): ...@@ -175,17 +176,20 @@ def run_benchmark(model, args):
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1]) y_data = y_data.reshape([len(y_data), 1])
outs = exe.run( outs = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor] fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
]
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
accuracy.update(value=outs[1], weight=outs[2]) accuracy.update(
value=np.array(np.mean(outs[1])),
weight=np.mean(np.array(outs[2])))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.array(outs[0]) loss = np.mean(np.array(outs[0]))
acc = np.array(outs[1]) acc = np.mean(np.array(outs[1]))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -241,6 +241,7 @@ def run_benchmark(model, args): ...@@ -241,6 +241,7 @@ def run_benchmark(model, args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
if args.use_fake_data: if args.use_fake_data:
data = train_reader().next() data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
...@@ -264,14 +265,17 @@ def run_benchmark(model, args): ...@@ -264,14 +265,17 @@ def run_benchmark(model, args):
data)).astype('float32') data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64') label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={'data': image, feed={'data': image,
'label': label}, 'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
iters += 1 iters += 1
num_samples += len(label) num_samples += len(label)
accuracy.add(value=acc, weight=weight) accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -169,6 +169,7 @@ def main(): ...@@ -169,6 +169,7 @@ def main():
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
train_accs = [] train_accs = []
...@@ -184,14 +185,17 @@ def main(): ...@@ -184,14 +185,17 @@ def main():
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
accuracy.add(value=acc, weight=weight) avg_cost.name, batch_acc.name, batch_size_tensor.name
])
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
print( print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss, acc) (pass_id, iters, loss, acc)
......
...@@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost") ...@@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost")
# So we use 1.41.0 here. # So we use 1.41.0 here.
set(BOOST_VER "1.41.0") set(BOOST_VER "1.41.0")
set(BOOST_TAR "boost_1_41_0") set(BOOST_TAR "boost_1_41_0")
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz") set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
......
...@@ -21,11 +21,12 @@ else() ...@@ -21,11 +21,12 @@ else()
ExternalProject_Add( ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/eigenteam/eigen-git-mirror"
# eigen on cuda9.1 missing header of math_funtions.hpp # eigen on cuda9.1 missing header of math_funtions.hpp
# https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen # https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen
GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
DOWNLOAD_NAME "eigen"
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""
......
...@@ -45,15 +45,15 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML") ...@@ -45,15 +45,15 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE() ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF() ENDIF()
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow") SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} -Wno-error=strict-overflow") SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
ExternalProject_Add( ExternalProject_Add(
${MKLDNN_PROJECT} ${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "v0.14" GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
...@@ -61,6 +61,7 @@ ExternalProject_Add( ...@@ -61,6 +61,7 @@ ExternalProject_Add(
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
CMAKE_ARGS -DWITH_TEST=OFF -DWITH_EXAMPLE=OFF
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
-DMKLROOT:PATH=${MKLML_ROOT} -DMKLROOT:PATH=${MKLML_ROOT}
) )
......
...@@ -27,8 +27,8 @@ ENDIF() ...@@ -27,8 +27,8 @@ ENDIF()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml") SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.1.20171007") SET(MKLML_VER "mklml_lnx_2018.0.3.20180406")
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz") SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "mklml") SET(MKLML_DST_DIR "mklml")
......
...@@ -47,8 +47,6 @@ ExternalProject_Add( ...@@ -47,8 +47,6 @@ ExternalProject_Add(
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_COMMAND make -j8
INSTALL_COMMAND make install
) )
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)
......
...@@ -46,8 +46,6 @@ ExternalProject_Add( ...@@ -46,8 +46,6 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_COMMAND make -j8
INSTALL_COMMAND make install
DEPENDS snappy DEPENDS snappy
) )
......
...@@ -70,6 +70,12 @@ copy(glog_lib ...@@ -70,6 +70,12 @@ copy(glog_lib
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
) )
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/boost/")
copy(boost_lib
SRCS ${BOOST_INCLUDE_DIR}/boost
DSTS ${dst_dir}
)
if(NOT PROTOBUF_FOUND) if(NOT PROTOBUF_FOUND)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf") set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf")
copy(protobuf_lib copy(protobuf_lib
...@@ -92,6 +98,14 @@ elseif (WITH_MKLML) ...@@ -92,6 +98,14 @@ elseif (WITH_MKLML)
) )
endif() endif()
if(WITH_MKLDNN)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mkldnn")
copy(mkldnn_lib
SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
DSTS ${dst_dir} ${dst_dir}/lib
)
endif()
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy") set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib copy(snappy_lib
...@@ -142,4 +156,10 @@ copy(string_lib ...@@ -142,4 +156,10 @@ copy(string_lib
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
) )
set(module "pybind")
copy(pybind_lib
SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
DSTS ${dst_dir}/${module}
)
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
...@@ -40,7 +40,7 @@ template <typename T> ...@@ -40,7 +40,7 @@ template <typename T>
class FCOp : public OperatorBase { class FCOp : public OperatorBase {
public: public:
void Run(...) { void Run(...) {
add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"); add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"));
} }
}; };
REGISTER_OP(FCOp, "fc"); REGISTER_OP(FCOp, "fc");
......
...@@ -155,7 +155,7 @@ into offsets ...@@ -155,7 +155,7 @@ into offsets
3 2+3 4+5 1+9 2+10 3+12 3 2+3 4+5 1+9 2+10 3+12
``` ```
so we know that the first sentence is from word 0 to word 3, and the second sentence from work 3 to word 5. so we know that the first sentence is from word 0 to word 3, and the second sentence from word 3 to word 5.
Similarly, the lengths in the top level LoD Similarly, the lengths in the top level LoD
......
...@@ -4,34 +4,37 @@ ...@@ -4,34 +4,37 @@
For the typical synchronous distributed training, some significant steps are as follows: For the typical synchronous distributed training, some significant steps are as follows:
1. A Trainer will compute the gradients and SEND them to the Parameter Server(PServer) nodes. 1. A trainer process will compute the gradients and **send** them to the parameter server (PS) nodes.
1. After the PServer node received gradients came from all the Trainers, It will aggregate the 1. After the PS node received gradients came from all the Trainers, It will aggregate the
gradient variables for the same parameter into one gradient variable and then apply the aggregated gradient variables for the same parameter into one gradient variable and then apply the aggregated
gradient to the respective parameter, finally using an optimize algorithms(SGD, Monument...) gradient to the respective parameter, finally using an optimize algorithms(SGD, Monument...)
to update the parameters. to update the parameters.
1. The Trainer would wait for the PServers finished the optimize stage, and GET the parameters from PServer, 1. The Trainer would wait for the PS finished the optimize stage, and GET the parameters from PS,
so all the Trainers would get the same parameters. so all the Trainers would get the same parameters.
In the synchronously distributed training, there should be a `Barrier` to synchronise the In Synchronous Distributed Training, there is a **barrier** on each PS to wait until all trainers processes
parameters after the optimizing stage. The performance of a distributed training job would have completed running current mini-batch. After that, all trainers can continue to run the next
depend on the slowest node if there were hundreds or thousands of training nodes in a mini-batch. So, we can find that the overall performance of Synchronous Distributed Training depends
Job, the performance of synchronously distributed training might be very poor because of on the slowest node.
the slow node. So this design doc would introduce an approach to implement
*asynchronously* distributed training in PaddlePaddle Fluid. In Asynchronous Distributed Training, we don't need to wait for a global mini-bach, the optimizer on
the PS will run immediately when the gradient is uploaded to the PS from one trainer. This mode would
train such models that achieve scaling, better throughput. In this design doc, we will introduce how to
implement the Asynchronous Distributed Training base on PaddlePaddle Fluid.
## Design ## Design
<img src="./src/async_update.png" width="600"/> <img src="./src/async_update.png" width="600"/>
As the figure above, we describe a global view of asynchronously update process and use As the figure above, we describe a global view of the asynchronous update process and use
the parameter `w1` as an example to introduce the steps: the parameter `w1` as an example to introduce the steps:
1. For each gradient variables, they may distribute on different GPU card and aggregate 1. For each gradient variables, they may distribute on different GPU card and aggregate
them while they are all calculated. them while they are all calculated.
1. Split the gradient variable into multiple blocks according to the number of PServer 1. Split the gradient variable into multiple blocks according to the number of PS
instances and then send them. instances and then send them.
1. PServer would run an `Optimize Block` using a specified optimize algorithm to update 1. PS would run an `Optimize Block` using a specified optimize algorithm to update
the specified parameter. the specified parameter.
1. The trainer will fetch latest parameter from PServer before running forward Op which depends 1. The trainer will fetch the latest parameter from PS before running forward Op which depends
on the specified parameter. on the specified parameter.
1. Broadcast the received variable into multiple GPU cards and continue to run the next 1. Broadcast the received variable into multiple GPU cards and continue to run the next
mini-batch. mini-batch.
...@@ -40,8 +43,8 @@ mini-batch. ...@@ -40,8 +43,8 @@ mini-batch.
- For the multiple devices distributed training, we need to aggregate the gradient - For the multiple devices distributed training, we need to aggregate the gradient
variables which placed on different devices firstly and then schedule a `SendVars` Operator to variables which placed on different devices firstly and then schedule a `SendVars` Operator to
send the gradient variables to the multiple PServer instances. send the gradient variables to the multiple PS instances.
- Schedule `FetchVars` operator to fetch the latest parameter from PServer before running - Schedule `FetchVars` operator to fetch the latest parameter from PS before running
the forward ops. the forward ops.
- There could be a large number of gradient variables to be sent, so we need to use another - There could be a large number of gradient variables to be sent, so we need to use another
thread pool(IO Threadpool) whose a number of the schedulable threads is larger than the thread pool(IO Threadpool) whose a number of the schedulable threads is larger than the
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
---------------- ----------------
PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像 PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ 找到。或者 可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ 找到,您也可以
参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。 在 `这里 <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`_ 找到 paddle_manylinux_devel
镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。
如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。 如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。
......
...@@ -22,6 +22,8 @@ How To Build ...@@ -22,6 +22,8 @@ How To Build
You need to use Docker to build PaddlePaddle You need to use Docker to build PaddlePaddle
to avoid installing dependencies by yourself. We have several pre-built to avoid installing dependencies by yourself. We have several pre-built
Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ , Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ ,
you can also find how to build and use paddle_manylinux_devel Docker image from
`here <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`_
Or you can build your own image from source as the optional step below: Or you can build your own image from source as the optional step below:
.. code-block:: bash .. code-block:: bash
......
...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY) ...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif() endif()
add_subdirectory(testing) add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI AND NOT WITH_C_API)
add_subdirectory(fluid) add_subdirectory(fluid)
endif() endif()
...@@ -5,11 +5,11 @@ proto_library(framework_proto SRCS framework.proto) ...@@ -5,11 +5,11 @@ proto_library(framework_proto SRCS framework.proto)
cc_library(ddim SRCS ddim.cc DEPS eigen3 boost) cc_library(ddim SRCS ddim.cc DEPS eigen3 boost)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
if(WITH_GPU) if(WITH_GPU)
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place memory device_context framework_proto) nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type)
else() else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place memory device_context framework_proto) cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type)
endif() endif()
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
...@@ -57,7 +57,7 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor ...@@ -57,7 +57,7 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
cc_library(attribute SRCS attribute.cc DEPS framework_proto boost) cc_library(attribute SRCS attribute.cc DEPS framework_proto boost)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
device_context) device_context)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
......
...@@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place, ...@@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
VLOG(3) << "DeviceTransform in, src_place " << in.place() VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
auto* dev_ctx = GetDeviceContext(in.place(), dst_place); auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait();
TensorCopy(in, dst_place, *dev_ctx, out); TensorCopy(in, dst_place, *dev_ctx, out);
dev_ctx->Wait(); if (platform::is_gpu_place(in.place()) && platform::is_cpu_place(dst_place)) {
dev_ctx->Wait();
}
} }
} // namespace framework } // namespace framework
......
...@@ -32,8 +32,7 @@ struct AddFunctor { ...@@ -32,8 +32,7 @@ struct AddFunctor {
class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input1 of test op"); AddInput("input", "input1 of test op");
AddOutput("output", "output of test op"); AddOutput("output", "output of test op");
AddAttr<bool>("use_gpu", "force to use gpu kernel").SetDefault(false); AddAttr<bool>("use_gpu", "force to use gpu kernel").SetDefault(false);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/data_type.h"
#include <stdint.h>
#include <string>
#include <unordered_map>
namespace paddle {
namespace framework {
struct DataTypeMap {
std::unordered_map<std::type_index, proto::VarType::Type> cpp_to_proto_;
std::unordered_map<int, std::type_index> proto_to_cpp_;
std::unordered_map<int, std::string> proto_to_str_;
std::unordered_map<std::type_index, size_t> cpp_to_size_;
};
static DataTypeMap* InitDataTypeMap();
static DataTypeMap& gDataTypeMap() {
static DataTypeMap* g_data_type_map_ = InitDataTypeMap();
return *g_data_type_map_;
}
template <typename T>
static inline void RegisterType(DataTypeMap* map,
proto::VarType::Type proto_type,
const std::string& name) {
map->proto_to_cpp_.emplace(static_cast<int>(proto_type), typeid(T));
map->cpp_to_proto_.emplace(typeid(T), proto_type);
map->proto_to_str_.emplace(static_cast<int>(proto_type), name);
map->cpp_to_size_.emplace(typeid(T), sizeof(T));
}
static DataTypeMap* InitDataTypeMap() {
auto retv = new DataTypeMap();
#define RegType(cc_type, proto_type) \
RegisterType<cc_type>(retv, proto_type, #cc_type)
// NOTE: Add your customize type here.
RegType(platform::float16, proto::VarType::FP16);
RegType(float, proto::VarType::FP32);
RegType(double, proto::VarType::FP64);
RegType(int, proto::VarType::INT32);
RegType(int64_t, proto::VarType::INT64);
RegType(bool, proto::VarType::BOOL);
RegType(size_t, proto::VarType::SIZE_T);
RegType(int16_t, proto::VarType::INT16);
RegType(uint8_t, proto::VarType::UINT8);
#undef RegType
return retv;
}
proto::VarType::Type ToDataType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_proto_.find(type);
if (it != gDataTypeMap().cpp_to_proto_.end()) {
return it->second;
}
PADDLE_THROW("Not support %s as tensor type", type.name());
}
std::type_index ToTypeIndex(proto::VarType::Type type) {
auto it = gDataTypeMap().proto_to_cpp_.find(static_cast<int>(type));
if (it != gDataTypeMap().proto_to_cpp_.end()) {
return it->second;
}
PADDLE_THROW("Not support proto::VarType::Type(%d) as tensor type",
static_cast<int>(type));
}
std::string DataTypeToString(const proto::VarType::Type type) {
auto it = gDataTypeMap().proto_to_str_.find(static_cast<int>(type));
if (it != gDataTypeMap().proto_to_str_.end()) {
return it->second;
}
PADDLE_THROW("Not support proto::VarType::Type(%d) as tensor type",
static_cast<int>(type));
}
size_t SizeOfType(std::type_index type) {
auto it = gDataTypeMap().cpp_to_size_.find(type);
if (it != gDataTypeMap().cpp_to_size_.end()) {
return it->second;
}
PADDLE_THROW("Not support %s as tensor type", type.name());
}
} // namespace framework
} // namespace paddle
...@@ -17,51 +17,14 @@ limitations under the License. */ ...@@ -17,51 +17,14 @@ limitations under the License. */
#include <typeindex> #include <typeindex>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
inline proto::VarType::Type ToDataType(std::type_index type) { extern proto::VarType::Type ToDataType(std::type_index type);
if (typeid(platform::float16).hash_code() == type.hash_code()) { extern std::type_index ToTypeIndex(proto::VarType::Type type);
return proto::VarType::FP16;
} else if (typeid(const float).hash_code() == type.hash_code()) {
// CPPLint complains Using C-style cast. Use static_cast<float>() instead
// One fix to this is to replace float with const float because
// typeid(T) == typeid(const T)
// http://en.cppreference.com/w/cpp/language/typeid
return proto::VarType::FP32;
} else if (typeid(const double).hash_code() == type.hash_code()) {
return proto::VarType::FP64;
} else if (typeid(const int).hash_code() == type.hash_code()) {
return proto::VarType::INT32;
} else if (typeid(const int64_t).hash_code() == type.hash_code()) {
return proto::VarType::INT64;
} else if (typeid(const bool).hash_code() == type.hash_code()) {
return proto::VarType::BOOL;
} else {
PADDLE_THROW("Not supported");
}
}
inline std::type_index ToTypeIndex(proto::VarType::Type type) {
switch (type) {
case proto::VarType::FP16:
return typeid(platform::float16);
case proto::VarType::FP32:
return typeid(float);
case proto::VarType::FP64:
return typeid(double);
case proto::VarType::INT32:
return typeid(int);
case proto::VarType::INT64:
return typeid(int64_t);
case proto::VarType::BOOL:
return typeid(bool);
default:
PADDLE_THROW("Not support type %d", type);
}
}
template <typename Visitor> template <typename Visitor>
inline void VisitDataType(proto::VarType::Type type, Visitor visitor) { inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
...@@ -84,37 +47,23 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) { ...@@ -84,37 +47,23 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
case proto::VarType::BOOL: case proto::VarType::BOOL:
visitor.template operator()<bool>(); visitor.template operator()<bool>();
break; break;
default: case proto::VarType::UINT8:
PADDLE_THROW("Not supported"); visitor.template operator()<uint8_t>();
} break;
}
inline std::string DataTypeToString(const proto::VarType::Type type) {
switch (type) {
case proto::VarType::FP16:
return "float16";
case proto::VarType::FP32:
return "float32";
case proto::VarType::FP64:
return "float64";
case proto::VarType::INT16: case proto::VarType::INT16:
return "int16"; visitor.template operator()<int16_t>();
case proto::VarType::INT32: break;
return "int32";
case proto::VarType::INT64:
return "int64";
case proto::VarType::BOOL:
return "bool";
default: default:
PADDLE_THROW("Not support type %d", type); PADDLE_THROW("Not supported %d", type);
} }
} }
extern std::string DataTypeToString(const proto::VarType::Type type);
extern size_t SizeOfType(std::type_index type);
inline std::ostream& operator<<(std::ostream& out, inline std::ostream& operator<<(std::ostream& out,
const proto::VarType::Type& type) { const proto::VarType::Type& type) {
out << DataTypeToString(type); out << DataTypeToString(type);
return out; return out;
} }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace framework {
namespace details {
struct BuildStrategy {
enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 };
enum class GradientScaleStrategy {
kCoeffNumDevice = 0,
kOne = 1,
kCustomized = 2,
};
ReduceStrategy reduce_{ReduceStrategy::kAllReduce};
GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -36,7 +36,7 @@ struct ComputationOpHandle : public OpHandleBase { ...@@ -36,7 +36,7 @@ struct ComputationOpHandle : public OpHandleBase {
protected: protected:
void RunImpl() override; void RunImpl() override;
virtual bool NeedWait(VarHandleBase *in_var); bool NeedWait(VarHandleBase *in_var) override;
private: private:
std::unique_ptr<OperatorBase> op_; std::unique_ptr<OperatorBase> op_;
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace framework {
namespace details {
struct ExecutionStrategy {
size_t num_threads_{0};
bool use_event_{true};
bool allow_op_delay_{false};
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() { ...@@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() {
WaitInputVarGenerated(platform::CPUPlace()); WaitInputVarGenerated(platform::CPUPlace());
tensors_.resize(inputs_.size()); tensors_.resize(inputs_.size());
auto *var_handle = static_cast<VarHandle *>(inputs_[0]);
auto &var_name = var_handle->name_;
platform::CPUPlace cpu; platform::CPUPlace cpu;
auto &scopes = *local_scopes_; auto &scopes = *local_scopes_;
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < inputs_.size(); ++i) {
auto &scope = scopes[i]; auto *var_handle = static_cast<VarHandle *>(inputs_[i]);
auto *var = auto &scope = scopes.at(var_handle->scope_idx_);
scope->FindVar(kLocalExecScopeName)->Get<Scope *>()->FindVar(var_name); auto *var = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_handle->name_);
PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope",
var_name); var_handle->name_);
auto &t = var->Get<framework::LoDTensor>(); auto &t = var->Get<framework::LoDTensor>();
if (platform::is_gpu_place(t.place())) { if (platform::is_gpu_place(t.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -42,7 +42,7 @@ struct FetchOpHandle : public OpHandleBase { ...@@ -42,7 +42,7 @@ struct FetchOpHandle : public OpHandleBase {
protected: protected:
void RunImpl() override; void RunImpl() override;
virtual void WaitInputVarGenerated(const platform::Place &place); void WaitInputVarGenerated(const platform::Place &place) override;
private: private:
FeedFetchList *data_; FeedFetchList *data_;
......
...@@ -37,25 +37,26 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( ...@@ -37,25 +37,26 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::string &loss_var_name, const std::string &loss_var_name,
const std::unordered_set<std::string> &params, const std::unordered_set<std::string> &params,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
platform::NCCLContextMap *nccl_ctxs, bool use_default_grad_scale) platform::NCCLContextMap *nccl_ctxs, const BuildStrategy &strategy)
: loss_var_name_(loss_var_name), : loss_var_name_(loss_var_name),
places_(places), places_(places),
local_scopes_(local_scopes), local_scopes_(local_scopes),
nccl_ctxs_(nccl_ctxs) { nccl_ctxs_(nccl_ctxs),
strategy_(strategy) {
#else #else
MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const std::string &loss_var_name, const std::string &loss_var_name,
const std::unordered_set<std::string> &params, const std::unordered_set<std::string> &params,
const std::vector<Scope *> &local_scopes, bool use_default_grad_scale) const std::vector<Scope *> &local_scopes, const BuildStrategy &strategy)
: loss_var_name_(loss_var_name), : loss_var_name_(loss_var_name),
places_(places), places_(places),
local_scopes_(local_scopes) { local_scopes_(local_scopes),
strategy_(strategy) {
#endif #endif
for (auto &p : params) { for (auto &p : params) {
grad_names_.insert(GradVarName(p)); grad_names_.insert(GradVarName(p));
} }
use_default_grad_scale_ = use_default_grad_scale;
} }
void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result,
...@@ -124,6 +125,12 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -124,6 +125,12 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
// Find "send" op first for split is in front of send. // Find "send" op first for split is in front of send.
OpDesc *send_op = GetSendOpDesc(program); OpDesc *send_op = GetSendOpDesc(program);
size_t cur_device_id = 0;
std::vector<std::unordered_set<std::string>> var_name_on_devices;
std::vector<std::unordered_set<std::string>> bcast_var_name_set;
var_name_on_devices.resize(places_.size());
bcast_var_name_set.resize(places_.size());
bool is_forwarding = true; bool is_forwarding = true;
for (auto *op : program.Block(0).AllOps()) { for (auto *op : program.Block(0).AllOps()) {
if (op->Type() == "send") { if (op->Type() == "send") {
...@@ -134,22 +141,42 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -134,22 +141,42 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
CreateComputationalOps(&result, *op, 1); CreateComputationalOps(&result, *op, 1);
} else if (IsScaleLossOp(*op)) { } else if (IsScaleLossOp(*op)) {
// user can customize loss@grad if not use_default_grad_scale_ // user can customize loss@grad if not use_default_grad_scale_
if (use_default_grad_scale_) { if (strategy_.gradient_scale_ !=
BuildStrategy::GradientScaleStrategy::kCustomized) {
CreateScaleLossGradOp(&result); CreateScaleLossGradOp(&result);
} }
is_forwarding = false; is_forwarding = false;
} else { } else {
CreateComputationalOps(&result, *op, places_.size()); int op_dev_id = GetOpDeviceID(var_name_on_devices, *op);
if (op_dev_id == -1) { // var on all device
CreateComputationalOps(&result, *op, places_.size());
} else {
CreateComputationalOp(&result, *op, op_dev_id);
for (auto &var_name : op->OutputArgumentNames()) {
var_name_on_devices[op_dev_id].emplace(var_name);
}
}
if (!is_forwarding && places_.size() > 1) { if (!is_forwarding && places_.size() > 1) {
// Currently, we assume that once gradient is generated, it can be // Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once. // broadcast, and each gradient is only broadcast once.
for (auto &og : op->OutputArgumentNames()) { for (auto &og : op->OutputArgumentNames()) {
if (IsParameterGradientOnce(og, &og_has_been_broadcast)) { if (IsParameterGradientOnce(og, &og_has_been_broadcast)) {
if (IsSparseGradient(var_types, og)) { switch (strategy_.reduce_) {
CreateReduceOp(&result, og, 0); case BuildStrategy::ReduceStrategy::kReduce:
CreateBroadcastOp(&result, og, 0); CreateReduceOp(&result, og, cur_device_id);
} else { var_name_on_devices[cur_device_id].emplace(og);
InsertNCCLAllReduceOp(&result, og); bcast_var_name_set[cur_device_id].emplace(
og.substr(0, og.size() - strlen(kGradVarSuffix)));
cur_device_id = (cur_device_id + 1) % places_.size();
break;
case BuildStrategy::ReduceStrategy::kAllReduce:
if (IsSparseGradient(var_types, og)) {
CreateReduceOp(&result, og, 0);
CreateBroadcastOp(&result, og, 0);
} else {
InsertNCCLAllReduceOp(&result, og);
}
break;
} }
} }
} }
...@@ -157,6 +184,13 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -157,6 +184,13 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
} }
} }
// Insert BCast Ops
for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) {
auto &to_bcast_set = bcast_var_name_set[dev_id];
for (auto &bcast_name : to_bcast_set) {
CreateBroadcastOp(&result, bcast_name, dev_id);
}
}
/* /*
Dependency graph has been constructed. However, there are still data Dependency graph has been constructed. However, there are still data
harzaeds need to be handled. harzaeds need to be handled.
...@@ -265,6 +299,26 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( ...@@ -265,6 +299,26 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
return is_pg_once; return is_pg_once;
} }
int MultiDevSSAGraphBuilder::GetOpDeviceID(
const std::vector<std::unordered_set<std::string>> &var_name_on_devices,
const OpDesc &op) const {
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) {
return -1;
}
int var_dev_id = -1;
for (auto &var_name : op.InputArgumentNames()) {
if (var_dev_id != -1) break;
for (size_t i = 0; i < var_name_on_devices.size(); ++i) {
if (var_name_on_devices[i].count(var_name)) {
var_dev_id = static_cast<int>(i);
break;
}
}
}
return var_dev_id;
}
void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(SSAGraph *result) const { void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(SSAGraph *result) const {
for (size_t i = 0; i < places_.size(); ++i) { for (size_t i = 0; i < places_.size(); ++i) {
// Insert ScaleCost OpHandle // Insert ScaleCost OpHandle
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/ssa_graph_builder.h" #include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace paddle { namespace paddle {
...@@ -36,13 +37,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -36,13 +37,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const std::unordered_set<std::string> &params, const std::unordered_set<std::string> &params,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
platform::NCCLContextMap *nccl_ctxs, platform::NCCLContextMap *nccl_ctxs,
bool use_default_grad_scale); const BuildStrategy &strategy);
#else #else
MultiDevSSAGraphBuilder(const std::vector<platform::Place> &places, MultiDevSSAGraphBuilder(const std::vector<platform::Place> &places,
const std::string &loss_var_name, const std::string &loss_var_name,
const std::unordered_set<std::string> &params, const std::unordered_set<std::string> &params,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
bool use_default_grad_scale); const BuildStrategy &strategy);
#endif #endif
std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override; std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
...@@ -60,7 +61,6 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -60,7 +61,6 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap *nccl_ctxs_; platform::NCCLContextMap *nccl_ctxs_;
#endif #endif
bool use_default_grad_scale_;
bool IsScaleLossOp(const OpDesc &op) const; bool IsScaleLossOp(const OpDesc &op) const;
...@@ -84,6 +84,10 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -84,6 +84,10 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const std::string &og, const std::string &og,
std::unordered_set<std::string> *og_has_been_broadcast) const; std::unordered_set<std::string> *og_has_been_broadcast) const;
int GetOpDeviceID(
const std::vector<std::unordered_set<std::string>> &var_name_on_devices,
const OpDesc &op) const;
void InsertNCCLAllReduceOp(SSAGraph *result, const std::string &og) const; void InsertNCCLAllReduceOp(SSAGraph *result, const std::string &og) const;
void CreateBroadcastOp(SSAGraph *result, const std::string &p_name, void CreateBroadcastOp(SSAGraph *result, const std::string &p_name,
...@@ -98,6 +102,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -98,6 +102,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
bool IsSparseGradient( bool IsSparseGradient(
const std::unordered_map<std::string, proto::VarType::Type> &var_types, const std::unordered_map<std::string, proto::VarType::Type> &var_types,
const std::string &og) const; const std::string &og) const;
private:
BuildStrategy strategy_;
}; };
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
...@@ -70,6 +70,14 @@ class OpHandleBase { ...@@ -70,6 +70,14 @@ class OpHandleBase {
const std::vector<VarHandleBase *> &Inputs() const { return inputs_; } const std::vector<VarHandleBase *> &Inputs() const { return inputs_; }
size_t NoDupInputSize() const {
std::unordered_set<VarHandleBase *> res;
for (auto *var : inputs_) {
res.emplace(var);
}
return res.size();
}
const std::vector<VarHandleBase *> &Outputs() const { return outputs_; } const std::vector<VarHandleBase *> &Outputs() const { return outputs_; }
protected: protected:
......
...@@ -95,7 +95,10 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -95,7 +95,10 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->proto_ = new proto::OpProto; info->proto_ = new proto::OpProto;
info->checker_ = new OpAttrChecker(); info->checker_ = new OpAttrChecker();
auto maker = T(info->proto_, info->checker_); T maker;
maker.SetProto(info->proto_);
maker.SetChecker(info->checker_);
maker.Make();
maker.Validate(); maker.Validate();
info->proto_->set_type(op_type); info->proto_->set_type(op_type);
PADDLE_ENFORCE( PADDLE_ENFORCE(
......
...@@ -18,18 +18,17 @@ namespace paddle { ...@@ -18,18 +18,17 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
size_t num_threads, bool use_event, const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
std::unique_ptr<SSAGraph> &&graph, bool allow_op_delay) std::unique_ptr<SSAGraph> &&graph)
: SSAGraphExecutor(std::move(graph)), : SSAGraphExecutor(std::move(graph)),
pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr), pool_(strategy.num_threads_ >= 2 ? new ::ThreadPool(strategy.num_threads_)
: nullptr),
local_scopes_(local_scopes), local_scopes_(local_scopes),
places_(places), places_(places),
fetch_ctxs_(places), fetch_ctxs_(places),
use_event_(use_event),
running_ops_(0), running_ops_(0),
allow_op_delay_(allow_op_delay) {} strategy_(strategy) {}
FeedFetchList ThreadedSSAGraphExecutor::Run( FeedFetchList ThreadedSSAGraphExecutor::Run(
const std::vector<std::string> &fetch_tensors) { const std::vector<std::string> &fetch_tensors) {
...@@ -86,7 +85,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -86,7 +85,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
// //
// NOTE: DelayedOps have a lower priority. It will be scheduled after all // NOTE: DelayedOps have a lower priority. It will be scheduled after all
// ready_ops have been performed. // ready_ops have been performed.
if (ready_ops.empty() && allow_op_delay_ && running_ops_ == 0) { if (ready_ops.empty() && strategy_.allow_op_delay_ && running_ops_ == 0) {
run_all_ops(delayed_ops); run_all_ops(delayed_ops);
} else { } else {
run_all_ops(ready_ops); run_all_ops(ready_ops);
...@@ -113,7 +112,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -113,7 +112,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto &deps = pending_ops[op]; auto &deps = pending_ops[op];
--deps; --deps;
if (deps == 0) { if (deps == 0) {
if (op->IsMultiDeviceTransfer() && allow_op_delay_) { if (op->IsMultiDeviceTransfer() && strategy_.allow_op_delay_) {
delayed_ops.insert(op); delayed_ops.insert(op);
} else { } else {
ready_ops.insert(op); ready_ops.insert(op);
...@@ -175,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( ...@@ -175,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps(
void ThreadedSSAGraphExecutor::InsertPendingOp( void ThreadedSSAGraphExecutor::InsertPendingOp(
std::unordered_map<OpHandleBase *, size_t> *pending_ops, std::unordered_map<OpHandleBase *, size_t> *pending_ops,
OpHandleBase *op_instance) const { OpHandleBase *op_instance) const {
pending_ops->insert({op_instance, op_instance->Inputs().size()}); pending_ops->insert({op_instance, op_instance->NoDupInputSize()});
} }
void ThreadedSSAGraphExecutor::InsertPendingVar( void ThreadedSSAGraphExecutor::InsertPendingVar(
...@@ -191,7 +190,7 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -191,7 +190,7 @@ void ThreadedSSAGraphExecutor::RunOp(
auto op_run = [ready_var_q, op, this] { auto op_run = [ready_var_q, op, this] {
try { try {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString(); VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
op->Run(use_event_); op->Run(strategy_.use_event_);
VLOG(10) << op << " " << op->Name() << " Done "; VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <functional> #include <functional>
#include "ThreadPool.h" // ThreadPool in thrird party #include "ThreadPool.h" // ThreadPool in thrird party
#include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h"
#include "paddle/fluid/framework/details/ssa_graph_executor.h" #include "paddle/fluid/framework/details/ssa_graph_executor.h"
...@@ -34,11 +35,10 @@ namespace details { ...@@ -34,11 +35,10 @@ namespace details {
class ThreadedSSAGraphExecutor : public SSAGraphExecutor { class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
public: public:
ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
std::unique_ptr<SSAGraph> &&graph, std::unique_ptr<SSAGraph> &&graph);
bool allow_op_delay);
// Run a SSAGraph by a thread pool // Run a SSAGraph by a thread pool
// Use topological sort algorithm // Use topological sort algorithm
...@@ -55,10 +55,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -55,10 +55,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
platform::DeviceContextPool fetch_ctxs_; platform::DeviceContextPool fetch_ctxs_;
const bool use_event_;
std::unique_ptr<platform::EnforceNotMet> exception_; std::unique_ptr<platform::EnforceNotMet> exception_;
std::atomic<int> running_ops_; std::atomic<int> running_ops_;
bool allow_op_delay_;
void InsertPendingOp(std::unordered_map<OpHandleBase *, size_t> *pending_ops, void InsertPendingOp(std::unordered_map<OpHandleBase *, size_t> *pending_ops,
OpHandleBase *op_instance) const; OpHandleBase *op_instance) const;
...@@ -74,6 +72,9 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -74,6 +72,9 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unordered_map<OpHandleBase *, size_t> *pending_ops, std::unordered_map<OpHandleBase *, size_t> *pending_ops,
std::unordered_set<VarHandleBase *> *pending_vars, std::unordered_set<VarHandleBase *> *pending_vars,
BlockingQueue<VarHandleBase *> *ready_vars, FeedFetchList *fetch_data); BlockingQueue<VarHandleBase *> *ready_vars, FeedFetchList *fetch_data);
private:
ExecutionStrategy strategy_;
}; };
} // namespace details } // namespace details
......
...@@ -228,7 +228,8 @@ static bool has_fetch_operators( ...@@ -228,7 +228,8 @@ static bool has_fetch_operators(
void Executor::Run(const ProgramDesc& program, Scope* scope, void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars, const std::string& feed_holder_name, bool create_local_scope, bool create_vars,
const std::string& feed_holder_name,
const std::string& fetch_holder_name) { const std::string& fetch_holder_name) {
platform::RecordBlock b(kProgramId); platform::RecordBlock b(kProgramId);
bool has_feed_ops = bool has_feed_ops =
...@@ -290,8 +291,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -290,8 +291,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
} }
auto ctx = Prepare(*copy_program, 0); auto ctx = Prepare(*copy_program, 0);
RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, create_vars, RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets,
feed_holder_name, fetch_holder_name); create_local_scope, create_vars, feed_holder_name,
fetch_holder_name);
} }
std::unique_ptr<ExecutorPrepareContext> Executor::Prepare( std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
...@@ -366,8 +368,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -366,8 +368,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
void Executor::RunPreparedContext( void Executor::RunPreparedContext(
ExecutorPrepareContext* ctx, Scope* scope, ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, bool create_vars, std::map<std::string, LoDTensor*>* fetch_targets, bool create_local_scope,
const std::string& feed_holder_name, const std::string& fetch_holder_name) { bool create_vars, const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
auto& global_block = ctx->prog_.Block(ctx->block_id_); auto& global_block = ctx->prog_.Block(ctx->block_id_);
PADDLE_ENFORCE( PADDLE_ENFORCE(
...@@ -387,7 +390,7 @@ void Executor::RunPreparedContext( ...@@ -387,7 +390,7 @@ void Executor::RunPreparedContext(
} }
} }
RunPreparedContext(ctx, scope, create_vars, create_vars); RunPreparedContext(ctx, scope, create_local_scope, create_vars);
// obtain the data of fetch_targets from fetch_holder // obtain the data of fetch_targets from fetch_holder
for (auto* op : global_block.AllOps()) { for (auto* op : global_block.AllOps()) {
......
...@@ -57,7 +57,7 @@ class Executor { ...@@ -57,7 +57,7 @@ class Executor {
void Run(const ProgramDesc& program, Scope* scope, void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars = true, bool create_local_scope = true, bool create_vars = true,
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");
...@@ -76,6 +76,7 @@ class Executor { ...@@ -76,6 +76,7 @@ class Executor {
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_local_scope = true,
bool create_vars = true, bool create_vars = true,
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");
......
...@@ -101,6 +101,9 @@ message VarType { ...@@ -101,6 +101,9 @@ message VarType {
FP16 = 4; FP16 = 4;
FP32 = 5; FP32 = 5;
FP64 = 6; FP64 = 6;
// Tensor<size_t> is used in C++.
SIZE_T = 19;
UINT8 = 20;
// Other types that may need additional descriptions // Other types that may need additional descriptions
LOD_TENSOR = 7; LOD_TENSOR = 7;
......
...@@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) { ...@@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoDTensor, RecordIO) { template <typename T>
static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
int* tmp = tensor.mutable_data<int>(make_ddim({4, 5}), platform::CPUPlace()); T* tmp = tensor.mutable_data<T>(make_ddim({4, 5}), platform::CPUPlace());
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
tmp[i] = i; tmp[i] = static_cast<T>(i);
} }
std::stringstream* stream = new std::stringstream(); std::stringstream* stream = new std::stringstream();
...@@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) { ...@@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) {
auto assert_tensor_ok = [](const LoDTensor& tensor) { auto assert_tensor_ok = [](const LoDTensor& tensor) {
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
ASSERT_EQ(tensor.data<int>()[i], i); ASSERT_EQ(tensor.data<T>()[i], static_cast<T>(i));
} }
}; };
...@@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) { ...@@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) {
} }
} }
TEST(LoDTensor, RecordIO) {
TestRecordIO<int>();
TestRecordIO<int16_t>();
TestRecordIO<uint8_t>();
TestRecordIO<float>();
TestRecordIO<double>();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -27,7 +27,7 @@ TEST(OpKernelType, ToString) { ...@@ -27,7 +27,7 @@ TEST(OpKernelType, ToString) {
LibraryType::kCUDNN); LibraryType::kCUDNN);
ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type), ASSERT_EQ(paddle::framework::KernelTypeToString(op_kernel_type),
"data_type[float32]:data_layout[NCHW]:place[CPUPlace]:library_type[" "data_type[float]:data_layout[NCHW]:place[CPUPlace]:library_type["
"CUDNN]"); "CUDNN]");
} }
......
...@@ -14,56 +14,57 @@ limitations under the License. */ ...@@ -14,56 +14,57 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include "glog/logging.h"
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// this class not only make proto but also init attribute checkers. // this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker { class OpProtoAndCheckerMaker {
public: public:
using OpProto = proto::OpProto; virtual void Make() = 0;
using OpAttrChecker = framework::OpAttrChecker;
OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: proto_(proto), op_checker_(op_checker) {}
virtual ~OpProtoAndCheckerMaker() { virtual ~OpProtoAndCheckerMaker() {
PADDLE_ENFORCE(validated_, "should call Validate after build"); CHECK(validated_) << "should call Validate after build";
} }
void SetProto(proto::OpProto *proto) { proto_ = proto; }
void SetChecker(OpAttrChecker *attr_checker) { op_checker_ = attr_checker; }
void Validate(); void Validate();
protected: protected:
struct VariableBuilder { struct VariableBuilder {
OpProto::Var* var_; proto::OpProto::Var *var_;
VariableBuilder& AsDuplicable() { VariableBuilder &AsDuplicable() {
var_->set_duplicable(true); var_->set_duplicable(true);
return *this; return *this;
} }
VariableBuilder& AsIntermediate() { VariableBuilder &AsIntermediate() {
var_->set_intermediate(true); var_->set_intermediate(true);
return *this; return *this;
} }
VariableBuilder& AsDispensable() { VariableBuilder &AsDispensable() {
var_->set_dispensable(true); var_->set_dispensable(true);
return *this; return *this;
} }
}; };
VariableBuilder AddInput(const std::string& name, const std::string& comment); VariableBuilder AddInput(const std::string &name, const std::string &comment);
VariableBuilder AddOutput(const std::string& name, VariableBuilder AddOutput(const std::string &name,
const std::string& comment); const std::string &comment);
template <typename T> template <typename T>
TypedAttrChecker<T>& AddAttr(const std::string& name, TypedAttrChecker<T> &AddAttr(const std::string &name,
const std::string& comment, const std::string &comment,
bool generated = false) { bool generated = false) {
auto* attr = proto_->add_attrs(); auto *attr = proto_->add_attrs();
attr->set_name(name); attr->set_name(name);
attr->set_comment(comment); attr->set_comment(comment);
attr->set_generated(generated); attr->set_generated(generated);
...@@ -71,21 +72,14 @@ class OpProtoAndCheckerMaker { ...@@ -71,21 +72,14 @@ class OpProtoAndCheckerMaker {
return op_checker_->AddAttrChecker<T>(name); return op_checker_->AddAttrChecker<T>(name);
} }
void AddComment(const std::string& comment) { proto_->set_comment(comment); } void AddComment(const std::string &comment) { proto_->set_comment(comment); }
private: private:
void CheckNoDuplicatedInOutAttrs(); void CheckNoDuplicatedInOutAttrs();
OpProto* proto_; proto::OpProto *proto_;
OpAttrChecker* op_checker_; OpAttrChecker *op_checker_;
bool validated_{false}; bool validated_{false};
}; };
class NOPMaker : public OpProtoAndCheckerMaker {
public:
NOPMaker(OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -18,9 +18,7 @@ limitations under the License. */ ...@@ -18,9 +18,7 @@ limitations under the License. */
class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public: public:
TestAttrProtoMaker(paddle::framework::proto::OpProto* proto, void Make() {
paddle::framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("scale", "scale of test op"); AddAttr<float>("scale", "scale of test op");
AddAttr<float>("scale", "scale of test op"); AddAttr<float>("scale", "scale of test op");
} }
...@@ -29,15 +27,16 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { ...@@ -29,15 +27,16 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
TEST(ProtoMaker, DuplicatedAttr) { TEST(ProtoMaker, DuplicatedAttr) {
paddle::framework::proto::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); TestAttrProtoMaker proto_maker;
proto_maker.SetProto(&op_proto);
proto_maker.SetChecker(&op_checker);
proto_maker.Make();
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
} }
class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public: public:
TestInOutProtoMaker(paddle::framework::proto::OpProto* proto, void Make() {
paddle::framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op"); AddInput("input", "input of test op");
AddInput("input", "input of test op"); AddInput("input", "input of test op");
} }
...@@ -46,6 +45,9 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { ...@@ -46,6 +45,9 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
TEST(ProtoMaker, DuplicatedInOut) { TEST(ProtoMaker, DuplicatedInOut) {
paddle::framework::proto::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); TestAttrProtoMaker proto_maker;
proto_maker.SetProto(&op_proto);
proto_maker.SetChecker(&op_checker);
proto_maker.Make();
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
} }
...@@ -33,8 +33,7 @@ class CosineOp : public OperatorBase { ...@@ -33,8 +33,7 @@ class CosineOp : public OperatorBase {
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
CosineOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of cosine op"); AddInput("input", "input of cosine op");
AddOutput("output", "output of cosine op"); AddOutput("output", "output of cosine op");
AddAttr<float>("scale", "scale of cosine op") AddAttr<float>("scale", "scale of cosine op")
...@@ -55,8 +54,7 @@ class MyTestOp : public OperatorBase { ...@@ -55,8 +54,7 @@ class MyTestOp : public OperatorBase {
class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of cosine op").AsDuplicable(); AddInput("input", "input of cosine op").AsDuplicable();
AddOutput("output", "output of cosine op").AsIntermediate(); AddOutput("output", "output of cosine op").AsIntermediate();
auto my_checker = [](int i) { auto my_checker = [](int i) {
...@@ -212,10 +210,7 @@ namespace framework { ...@@ -212,10 +210,7 @@ namespace framework {
class OpKernelTestMaker : public OpProtoAndCheckerMaker { class OpKernelTestMaker : public OpProtoAndCheckerMaker {
public: public:
OpKernelTestMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() { AddComment("NoGradOp, same input output. no Grad"); }
: OpProtoAndCheckerMaker(proto, op_checker) {
AddComment("NoGradOp, same input output. no Grad");
}
}; };
class OpWithKernelTest : public OperatorWithKernel { class OpWithKernelTest : public OperatorWithKernel {
...@@ -275,9 +270,9 @@ TEST(OperatorRegistrar, CUDA) { ...@@ -275,9 +270,9 @@ TEST(OperatorRegistrar, CUDA) {
static int op_test_value = 0; static int op_test_value = 0;
using paddle::platform::DeviceContext;
using paddle::platform::CPUDeviceContext; using paddle::platform::CPUDeviceContext;
using paddle::platform::CUDADeviceContext; using paddle::platform::CUDADeviceContext;
using paddle::platform::DeviceContext;
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -33,7 +33,6 @@ limitations under the License. */ ...@@ -33,7 +33,6 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/variant.h" #include "paddle/fluid/platform/variant.h"
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -192,6 +191,10 @@ class ExecutionContext { ...@@ -192,6 +191,10 @@ class ExecutionContext {
return op_.Attr<T>(name); return op_.Attr<T>(name);
} }
bool HasInput(const std::string& name) const { return op_.HasInputs(name); }
bool HasOutput(const std::string& name) const { return op_.HasOutputs(name); }
size_t InputSize(const std::string& name) const { size_t InputSize(const std::string& name) const {
return op_.Inputs(name).size(); return op_.Inputs(name).size();
} }
......
...@@ -46,8 +46,7 @@ class OpWithoutKernelTest : public OperatorBase { ...@@ -46,8 +46,7 @@ class OpWithoutKernelTest : public OperatorBase {
class OpWithoutKernelCheckerMaker : public OpProtoAndCheckerMaker { class OpWithoutKernelCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
OpWithoutKernelCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op"); AddInput("input", "input of test op");
AddOutput("output", "output of test op"); AddOutput("output", "output of test op");
AddAttr<float>("scale", "scale of cosine op"); AddAttr<float>("scale", "scale of cosine op");
...@@ -98,8 +97,7 @@ namespace framework { ...@@ -98,8 +97,7 @@ namespace framework {
class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public: public:
OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("x", "input of test op"); AddInput("x", "input of test op");
AddOutput("y", "output of test op"); AddOutput("y", "output of test op");
AddAttr<float>("scale", "scale of cosine op") AddAttr<float>("scale", "scale of cosine op")
...@@ -137,9 +135,7 @@ class CPUKernelTest : public OpKernel<float> { ...@@ -137,9 +135,7 @@ class CPUKernelTest : public OpKernel<float> {
class OpKernelTestMultiInputsProtoAndCheckerMaker class OpKernelTestMultiInputsProtoAndCheckerMaker
: public OpProtoAndCheckerMaker { : public OpProtoAndCheckerMaker {
public: public:
OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto, void Make() {
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("xs", "inputs of test op").AsDuplicable(); AddInput("xs", "inputs of test op").AsDuplicable();
AddInput("k", "input of test op"); AddInput("k", "input of test op");
AddOutput("ys", "outputs of test op").AsDuplicable(); AddOutput("ys", "outputs of test op").AsDuplicable();
......
...@@ -52,13 +52,13 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() { ...@@ -52,13 +52,13 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
} }
ParallelExecutor::ParallelExecutor( ParallelExecutor::ParallelExecutor(
size_t num_threads, bool use_event,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const std::unordered_set<std::string> &params, const std::unordered_set<std::string> &params,
const std::unordered_set<std::string> &bcast_vars, const std::unordered_set<std::string> &bcast_vars,
const ProgramDesc &main_program, const std::string &loss_var_name, const ProgramDesc &main_program, const std::string &loss_var_name,
Scope *scope, const std::vector<Scope *> &local_scopes, bool allow_op_delay, Scope *scope, const std::vector<Scope *> &local_scopes,
bool use_default_grad_scale) const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy,
size_t num_trainers, size_t trainer_id)
: member_(new ParallelExecutorPrivate(places)) { : member_(new ParallelExecutorPrivate(places)) {
member_->global_scope_ = scope; member_->global_scope_ = scope;
...@@ -80,7 +80,13 @@ ParallelExecutor::ParallelExecutor( ...@@ -80,7 +80,13 @@ ParallelExecutor::ParallelExecutor(
// Bcast Parameters to all GPUs // Bcast Parameters to all GPUs
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME);
ncclUniqueId *nccl_id = nullptr;
if (nccl_id_var != nullptr) {
nccl_id = nccl_id_var->GetMutable<ncclUniqueId>();
}
member_->nccl_ctxs_.reset(new platform::NCCLContextMap(
member_->places_, nccl_id, num_trainers, trainer_id));
#endif #endif
if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1 && if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1 &&
local_scopes.empty()) { // Is CUDA local_scopes.empty()) { // Is CUDA
...@@ -93,17 +99,16 @@ ParallelExecutor::ParallelExecutor( ...@@ -93,17 +99,16 @@ ParallelExecutor::ParallelExecutor(
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
details::MultiDevSSAGraphBuilder builder( details::MultiDevSSAGraphBuilder builder(
member_->places_, loss_var_name, params, member_->local_scopes_, member_->places_, loss_var_name, params, member_->local_scopes_,
member_->nccl_ctxs_.get(), use_default_grad_scale); member_->nccl_ctxs_.get(), build_strategy);
#else #else
details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name,
params, member_->local_scopes_, params, member_->local_scopes_,
use_default_grad_scale); build_strategy);
#endif #endif
auto graph = builder.Build(main_program); auto graph = builder.Build(main_program);
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
num_threads, use_event, member_->local_scopes_, places, std::move(graph), exec_strategy, member_->local_scopes_, places, std::move(graph)));
allow_op_delay));
// Step 3. Create vars in each scope; // Step 3. Create vars in each scope;
for (auto *var : main_program.Block(0).AllVars()) { for (auto *var : main_program.Block(0).AllVars()) {
......
...@@ -14,55 +14,60 @@ limitations under the License. */ ...@@ -14,55 +14,60 @@ limitations under the License. */
#pragma once #pragma once
#include <paddle/fluid/framework/details/build_strategy.h>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class ParallelExecutorPrivate; class ParallelExecutorPrivate;
using details::BuildStrategy;
using details::ExecutionStrategy;
class ParallelExecutor { class ParallelExecutor {
DISABLE_COPY_AND_ASSIGN(ParallelExecutor); DISABLE_COPY_AND_ASSIGN(ParallelExecutor);
public: public:
explicit ParallelExecutor(size_t num_threads, bool use_event, explicit ParallelExecutor(const std::vector<platform::Place> &places,
const std::vector<platform::Place>& places, const std::unordered_set<std::string> &params,
const std::unordered_set<std::string>& params, const std::unordered_set<std::string> &bcast_vars,
const std::unordered_set<std::string>& bcast_vars, const ProgramDesc &main_program,
const ProgramDesc& main_program, const std::string &loss_var_name, Scope *scope,
const std::string& loss_var_name, Scope* scope, const std::vector<Scope *> &local_scopes,
const std::vector<Scope*>& local_scopes, const ExecutionStrategy &exec_strategy,
bool allow_op_delay, bool use_default_grad_scale); const BuildStrategy &build_strategy,
size_t num_trainers = 1, size_t trainer_id = 0);
~ParallelExecutor(); ~ParallelExecutor();
std::vector<Scope*>& GetLocalScopes(); std::vector<Scope *> &GetLocalScopes();
/** /**
* Feed tensors to local scopes. The size of tensors should be equal to the * Feed tensors to local scopes. The size of tensors should be equal to the
* size of local scopes. * size of local scopes.
*/ */
void FeedTensorsIntoLocalScopes( void FeedTensorsIntoLocalScopes(
const std::vector<std::unordered_map<std::string, LoDTensor>>& tensors); const std::vector<std::unordered_map<std::string, LoDTensor>> &tensors);
void FeedAndSplitTensorIntoLocalScopes( void FeedAndSplitTensorIntoLocalScopes(
const std::unordered_map<std::string, LoDTensor>& tensors); const std::unordered_map<std::string, LoDTensor> &tensors);
void Run(const std::vector<std::string>& fetch_tensors, void Run(const std::vector<std::string> &fetch_tensors,
const std::string& fetched_var_name); const std::string &fetched_var_name);
void BCastParamsToGPUs(const std::unordered_set<std::string>& vars) const; void BCastParamsToGPUs(const std::unordered_set<std::string> &vars) const;
private: private:
ParallelExecutorPrivate* member_; ParallelExecutorPrivate *member_;
}; };
} // namespace framework } // namespace framework
......
...@@ -13,54 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,54 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
extern size_t SizeOfType(std::type_index type);
template <typename... T>
struct SizeOfTypeFunctor;
template <typename T>
struct SizeOfTypeFunctor<T> {
size_t operator()(std::type_index type) const {
if (typeid(T).hash_code() == type.hash_code()) {
return sizeof(T);
} else {
return 0UL;
}
}
};
template <>
struct SizeOfTypeFunctor<> {
size_t operator()(std::type_index type) const { return 0UL; }
};
template <typename HEAD, typename... TAIL>
struct SizeOfTypeFunctor<HEAD, TAIL...> {
size_t operator()(std::type_index type) const {
SizeOfTypeFunctor<HEAD> head;
size_t head_size = head(type);
if (head_size != 0) {
return head_size;
}
SizeOfTypeFunctor<TAIL...> tail;
return tail(type);
}
};
static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t,
platform::float16>
functor;
size_t size = functor(type);
PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
return size;
}
inline void Tensor::check_memory_size() const { inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); holder_, "Tensor holds no memory. Call Tensor::mutable_data first.");
......
...@@ -24,8 +24,7 @@ namespace framework { ...@@ -24,8 +24,7 @@ namespace framework {
class SumOpMaker : public OpProtoAndCheckerMaker { class SumOpMaker : public OpProtoAndCheckerMaker {
public: public:
SumOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "").AsDuplicable(); AddInput("X", "").AsDuplicable();
AddOutput("Out", ""); AddOutput("Out", "");
AddComment(""); AddComment("");
......
cc_library(dot SRCS dot.cc) cc_library(analysis SRCS dot.cc node.cc node.h)
cc_test(test_node SRCS node_tester.cc DEPS analysis)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace inference {
namespace analysis {
enum class Device { CPU, GPU };
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <glog/logging.h> #include <glog/logging.h>
#include <sstream> #include <sstream>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/dot.h"
#include <gtest/gtest.h>
#include <memory>
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
namespace paddle {
namespace inference {
namespace analysis {
class DotTester : public ::testing::Test {
protected:
void SetUp() override {
std::vector<Dot::Attr> attrs({{"title", "hello"}});
dot.reset(new Dot(attrs));
dot->AddNode("a", {Dot::Attr{"shape", "box"}, Dot::Attr("color", "blue")});
dot->AddNode("b", {});
dot->AddNode("c", {});
dot->AddEdge("a", "b", {});
dot->AddEdge("b", "c", {});
dot->AddEdge("a", "c", {});
}
std::unique_ptr<Dot> dot;
};
TEST_F(DotTester, Build) {
auto codes = dot->Build();
// Output the DOT language code, the generated codes are too long to compare
// the string.
//
// The output is
//
// digraph G {
// title="hello"
// node_1
// node_2
// node_0[label="a" shape="box" color="blue"]
// node_0->node_1
// node_1->node_2
// node_0->node_2
// } // end G
LOG(INFO) << '\n' << codes;
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace inference {
namespace analysis {
template <typename IteratorT>
class iterator_range {
IteratorT begin_, end_;
public:
template <typename Container>
explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {}
iterator_range(const IteratorT &begin, const IteratorT &end)
: begin_(begin), end_(end) {}
const IteratorT &begin() const { return begin_; }
const IteratorT &end() const { return end_; }
};
/*
* An registry helper class, with its records keeps the order they registers.
*/
template <typename T>
class OrderedRegistry {
public:
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name));
dic_[name] = data_.size();
data_.emplace_back(std::unique_ptr<T>(x));
return data_.back().get();
}
T *Lookup(const std::string &name) {
auto it = dic_.find(name);
if (it == dic_.end()) return nullptr;
return data_[it->second].get();
}
protected:
std::unordered_map<std::string, int> dic_;
std::vector<std::unique_ptr<T>> data_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
#define PADDLE_DISALLOW_COPY_AND_ASSIGN(type__) \
\
type__(const type__ &) = delete; \
\
void operator=(const type__ &) = delete;
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/node.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace inference {
namespace analysis {
std::vector<Dot::Attr> Value::dot_attrs() const {
return std::vector<Dot::Attr>({Dot::Attr("style", "filled,rounded"),
Dot::Attr("shape", "box"),
Dot::Attr("fillcolor", "red")});
}
std::vector<Dot::Attr> Function::dot_attrs() const {
return std::vector<Dot::Attr>({Dot::Attr("style", "filled,rounded"),
Dot::Attr("shape", "diamond"),
Dot::Attr("fillcolor", "yellow")});
}
Node *NodeMap::Create(Node::Type type) {
switch (type) {
case Node::Type::kFunction:
nodes_.emplace_back(new Function);
break;
case Node::Type::kValue:
nodes_.emplace_back(new Value);
break;
default:
PADDLE_THROW("Not supported node type.");
}
nodes_.back()->id_ = size() - 1;
return nodes_.back().get();
}
Node *NodeMap::GetMutable(size_t id) {
PADDLE_ENFORCE_GT(size(), id);
return nodes_[id].get();
}
const Node &NodeMap::Get(size_t id) const {
PADDLE_ENFORCE_GT(size(), id);
return *nodes_[id].get();
}
void NodeMap::Delete(size_t id) {
PADDLE_ENFORCE_LT(id, size());
nodes_[id]->SetDeleted();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the Node class and its subclasses. A Node is the basis
* analysis element in a computation graph.
* There are basically two kinds of nodes, the function node and value node.
*/
#pragma once
#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/inference/analysis/device.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace inference {
namespace analysis {
class NodeMap;
/*
* Node Representation.
*
* This is a very important class for analysis. It is the base class of all
* nodes computed by a program that may be used as operands to other nodes.
* Node is the super class of other important classes such as Function and
* Value, some nodes can have a name.
*/
class Node {
public:
// Node type. NOTE the new node types should add here.
enum class Type { kNone = -1, kFunction, kValue, kFunctionBlock };
Node() = default;
struct Attr;
// Cast to a subclass type, Function for example.
template <typename Subclass>
Subclass &As() {
return *dynamic_cast<Subclass *>(this);
}
// Formatted representation of this Node.
virtual std::string repr() const {
return name() + "(" + std::to_string(id()) + ")";
}
// DOT node representation. One Node type can customize its own node
// representation.
virtual std::vector<Dot::Attr> dot_attrs() const {
return std::vector<Dot::Attr>({Dot::Attr("style", "filled")});
}
// Get an additional attribute and convert it to T data type. NOTE this will
// silently create a new attribute if not exists.
Attr &attr(const std::string &name) { return attrs_[name]; }
int id() const { return id_; }
bool deleted() const { return deleted_; }
void SetDeleted() { deleted_ = true; }
void SetName(const std::string &name) { name_ = name; }
const std::string &name() const { return name_; }
void SetType(Type type) { type_ = type; }
Type type() const { return type_; }
void *extra_info() const { return extra_info_; }
void SetExtraInfo(void *extra_info) { extra_info_ = extra_info; }
// Input links.
std::vector<Node *> inlinks;
// Output links.
std::vector<Node *> outlinks;
// A helper class to maintain the status from Pass.
// TODO(superjomn) add a checker here to ensure the T is primary.
struct Attr {
// NOTE T should be a primary type or a struct combined by several primary
// types.
// NOTE the STL containers should not use here.
// Some usages
// Attr attr;
// T data;
// attr.data.assign((char*)data, sizeof(data));
bool &Bool() { return As<bool>(); }
float &Float() { return As<float>(); }
int32_t &Int32() { return As<int32_t>(); }
int64_t &Int64() { return As<int64_t>(); }
private:
template <typename T>
T &As() {
// init storage in the first usage.
if (data_.empty()) {
VLOG(4) << "resize data to " << sizeof(T);
type_hash_ = typeid(T).hash_code();
data_.resize(sizeof(T));
}
PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), "type not matched");
PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error");
return *reinterpret_cast<T *>(&data_[0]);
}
private:
std::string data_;
size_t type_hash_{std::numeric_limits<size_t>::max()};
};
virtual ~Node() {}
friend class NodeMap;
PADDLE_DISALLOW_COPY_AND_ASSIGN(Node);
protected:
// The id number not the name is a node's unique identifier in the computation
// graph.
int id_{-1};
std::string name_;
Type type_{Type::kNone};
// Mark this node is deleted by some pass.
bool deleted_{false};
void *extra_info_;
mutable std::unordered_map<std::string, Attr> attrs_;
};
class Function;
/*
* Value represents a value node, it has some attributes including dims, data
* type and so on.
*/
class Value : public Node {
public:
enum class DataType { kInt32, kInt64, kFloat32, kFloat64 };
using Dims = std::vector<int>;
void SetDataType(DataType data_type) { data_type_ = data_type; }
DataType data_type() const { return data_type_; }
void SetDims(const Dims &dims) { dims_ = dims; }
const Dims &dims() const { return dims_; }
Device device() const { return device_; }
void SetDevice(Device device) { device_ = device; }
std::vector<Dot::Attr> dot_attrs() const override;
PADDLE_DISALLOW_COPY_AND_ASSIGN(Value);
protected:
Value() { SetType(Node::Type::kValue); }
friend class NodeMap;
private:
DataType data_type_;
Dims dims_;
Device device_;
};
/*
* Function represents any kind of executable concepts that takes several Values
* as input, and outputs several Values.
*/
class Function : public Node {
public:
std::vector<Dot::Attr> dot_attrs() const override;
// Get the operator's type from Desc.
const std::string &func_type() const { return func_type_; }
// Set the operator's type.
void SetFuncType(const std::string &func_type) { func_type_ = func_type; }
PADDLE_DISALLOW_COPY_AND_ASSIGN(Function);
protected:
std::string func_type_;
Function() { SetType(Node::Type::kFunction); }
friend class NodeMap;
};
/*
* FunctionBlock is a Node that contains a sub-graph multiple Node.
*/
struct FunctionBlock : public Node {
std::string repr() const override { return "block-" + std::to_string(id()); }
std::vector<Node *> subgraph;
};
class NodeMap {
public:
// Create a new node with type.
Node *Create(Node::Type type);
// Get a node by its id.
Node *GetMutable(size_t id);
const Node &Get(size_t id) const;
void Delete(size_t id);
const std::vector<std::unique_ptr<Node>> &nodes() { return nodes_; }
size_t size() const { return nodes_.size(); }
private:
std::vector<std::unique_ptr<Node>> nodes_;
std::unordered_map<std::string, Node *> map_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/node.h"
#include <gtest/gtest.h>
namespace paddle {
namespace inference {
namespace analysis {
TEST(Node, Attr) {
// Node is an abstract class, use Value instead for they share the same Attr
// logic.
NodeMap nodes;
auto* node = nodes.Create(Node::Type::kValue);
node->attr("v0").Int32() = 2008;
ASSERT_EQ(node->attr("v0").Int32(), 2008);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
namespace paddle { namespace paddle {
...@@ -58,8 +59,8 @@ class EngineBase { ...@@ -58,8 +59,8 @@ class EngineBase {
struct Buffer { struct Buffer {
void* buffer{nullptr}; // buffer should be allocated only once. void* buffer{nullptr}; // buffer should be allocated only once.
int max_size; // buffer allocated space. size_t max_size; // buffer allocated space.
int size; // data size. size_t size; // data size.
DeviceType device{DeviceType::UNK}; // tells which device this buffer is on. DeviceType device{DeviceType::UNK}; // tells which device this buffer is on.
}; };
......
nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto) nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine) nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
add_subdirectory(convert) add_subdirectory(convert)
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
...@@ -21,15 +21,18 @@ namespace tensorrt { ...@@ -21,15 +21,18 @@ namespace tensorrt {
class ReluOpConverter : public OpConverter { class ReluOpConverter : public OpConverter {
public: public:
ReluOpConverter() {} ReluOpConverter() {}
void operator()(const framework::OpDesc& op) override { void operator()(const framework::proto::OpDesc& op) override {
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework::OpDesc op_desc(op, nullptr, nullptr);
LOG(INFO) << "convert a fluid relu op to tensorrt activation layer whose " LOG(INFO) << "convert a fluid relu op to tensorrt activation layer whose "
"type is Relu"; "type is Relu";
const nvinfer1::ITensor* input_tensor = const nvinfer1::ITensor* input_tensor =
engine_->GetITensor(op.Input("X")[0]); engine_->GetITensor(op_desc.Input("X")[0]);
nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER( nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor), engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
nvinfer1::ActivationType::kRELU); nvinfer1::ActivationType::kRELU);
engine_->SetITensor(op.Output("Out")[0], layer->getOutput(0)); engine_->SetITensor(op_desc.Output("Out")[0], layer->getOutput(0));
} }
}; };
......
...@@ -21,7 +21,7 @@ namespace tensorrt { ...@@ -21,7 +21,7 @@ namespace tensorrt {
class Conv2dOpConverter : public OpConverter { class Conv2dOpConverter : public OpConverter {
public: public:
Conv2dOpConverter() {} Conv2dOpConverter() {}
void operator()(const framework::OpDesc& op) override { void operator()(const framework::proto::OpDesc& op) override {
LOG(INFO) LOG(INFO)
<< "convert a fluid conv2d op to tensorrt conv layer without bias"; << "convert a fluid conv2d op to tensorrt conv layer without bias";
} }
......
...@@ -23,26 +23,42 @@ namespace tensorrt { ...@@ -23,26 +23,42 @@ namespace tensorrt {
using platform::is_gpu_place; using platform::is_gpu_place;
using platform::is_cpu_place; using platform::is_cpu_place;
class DefaultInputConverter : public EngineInputConverter { class DefaultIOConverter : public EngineIOConverter {
public: public:
DefaultInputConverter() {} DefaultIOConverter() {}
// NOTE out is GPU memory. // NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out, virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(out != nullptr); PADDLE_ENFORCE(out != nullptr);
PADDLE_ENFORCE_LE(in.memory_size(), max_size); PADDLE_ENFORCE(stream_ != nullptr);
const auto& place = in.place(); const auto& place = in.place();
size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size);
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE(stream_ != nullptr); PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
PADDLE_ENFORCE_EQ(0, cudaMemcpyHostToDevice, *stream_));
cudaMemcpyAsync(out, in.data<float>(), in.memory_size(),
cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyAsync(out, in.data<float>(), in.memory_size(), cudaMemcpyDeviceToDevice, *stream_));
cudaMemcpyHostToHost, *stream_)); } else {
PADDLE_THROW("Unknown device for converter");
}
cudaStreamSynchronize(*stream_);
}
// NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override {
PADDLE_ENFORCE(in != nullptr);
PADDLE_ENFORCE(stream_ != nullptr);
const auto& place = out->place();
size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size);
if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_));
} else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW("Unknown device for converter");
} }
...@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter { ...@@ -50,7 +66,8 @@ class DefaultInputConverter : public EngineInputConverter {
} }
}; };
REGISTER_TENSORRT_INPUT_CONVERTER(default, DefaultInputConverter); // fluid LodTensor <-> tensorrt ITensor
REGISTER_TENSORRT_IO_CONVERTER(default, DefaultIOConverter);
} // namespace tensorrt } // namespace tensorrt
} // namespace inference } // namespace inference
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <unordered_map> #include <unordered_map>
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/inference/utils/singleton.h"
...@@ -25,43 +26,57 @@ namespace tensorrt { ...@@ -25,43 +26,57 @@ namespace tensorrt {
using framework::LoDTensor; using framework::LoDTensor;
/* /*
* Convert Input from Fluid to an Engine. * Convert Input from Fluid to TensorRT Engine.
* TensorRT's ITensor follows row major, NCHW. Fluid is also row major, so in * Convert Output from TensorRT Engine to Fluid.
* most cases just need to copy the data. *
* Note that TensorRT's ITensor follows row major, NCHW. Fluid is also row
* major,
* so in the default case just need to copy the data.
*/ */
class EngineInputConverter { class EngineIOConverter {
public: public:
EngineInputConverter() {} EngineIOConverter() {}
virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {} virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {}
virtual void operator()(const void* in, LoDTensor* out, size_t max_size) {}
void SetStream(cudaStream_t* stream) { stream_ = stream; } void SetStream(cudaStream_t* stream) { stream_ = stream; }
static void Run(const std::string& in_op_type, const LoDTensor& in, void* out, static void ConvertInput(const std::string& op_type, const LoDTensor& in,
size_t max_size, cudaStream_t* stream) { void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE(stream != nullptr);
auto* converter = Registry<EngineInputConverter>::Lookup( auto* converter = Registry<EngineIOConverter>::Lookup(
in_op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(converter);
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
virtual ~EngineInputConverter() {} static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size,
cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr);
auto* converter = Registry<EngineIOConverter>::Lookup(
op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter);
converter->SetStream(stream);
(*converter)(in, out, max_size);
}
virtual ~EngineIOConverter() {}
protected: protected:
cudaStream_t* stream_{nullptr}; cudaStream_t* stream_{nullptr};
}; };
#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \
struct trt_io_##op_type__##_converter { \
trt_io_##op_type__##_converter() { \
Registry<EngineIOConverter>::Register<Converter__>(#op_type__); \
} \
}; \
trt_io_##op_type__##_converter trt_io_##op_type__##_converter__;
} // namespace tensorrt } // namespace tensorrt
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
#define REGISTER_TENSORRT_INPUT_CONVERTER(in_op_type__, Converter__) \
struct trt_input_##in_op_type__##_converter { \
trt_input_##in_op_type__##_converter() { \
::paddle::inference::Registry<EngineInputConverter>::Register< \
Converter__>(#in_op_type__); \
} \
}; \
trt_input_##in_op_type__##_converter trt_input_##in_op_type__##_converter__;
...@@ -21,7 +21,7 @@ namespace tensorrt { ...@@ -21,7 +21,7 @@ namespace tensorrt {
class MulOpConverter : public OpConverter { class MulOpConverter : public OpConverter {
public: public:
MulOpConverter() {} MulOpConverter() {}
void operator()(const framework::OpDesc& op) override { void operator()(const framework::proto::OpDesc& op) override {
LOG(INFO) << "convert a fluid mul op to tensorrt fc layer without bias"; LOG(INFO) << "convert a fluid mul op to tensorrt fc layer without bias";
} }
}; };
......
...@@ -31,10 +31,10 @@ namespace tensorrt { ...@@ -31,10 +31,10 @@ namespace tensorrt {
class OpConverter { class OpConverter {
public: public:
OpConverter() {} OpConverter() {}
virtual void operator()(const framework::OpDesc& op) {} virtual void operator()(const framework::proto::OpDesc& op) {}
void Run(const framework::OpDesc& op, TensorRTEngine* engine) { void Run(const framework::proto::OpDesc& op, TensorRTEngine* engine) {
std::string type = op.Type(); std::string type = op.type();
auto* it = Registry<OpConverter>::Lookup(type); auto* it = Registry<OpConverter>::Lookup(type);
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", type); PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", type);
it->SetEngine(engine); it->SetEngine(engine);
...@@ -42,14 +42,16 @@ class OpConverter { ...@@ -42,14 +42,16 @@ class OpConverter {
} }
// convert fluid op to tensorrt layer // convert fluid op to tensorrt layer
void ConvertOp(const framework::OpDesc& op, TensorRTEngine* engine) { void ConvertOp(const framework::proto::OpDesc& op, TensorRTEngine* engine) {
OpConverter::Run(op, engine); OpConverter::Run(op, engine);
} }
// convert fluid block to tensorrt network // convert fluid block to tensorrt network
void ConvertBlock(const framework::BlockDesc& block, TensorRTEngine* engine) { void ConvertBlock(const framework::proto::BlockDesc& block,
for (auto op : block.AllOps()) { TensorRTEngine* engine) {
OpConverter::Run(*op, engine); for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i);
OpConverter::Run(op, engine);
} }
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -26,7 +27,7 @@ namespace paddle { ...@@ -26,7 +27,7 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
void Compare(float input, float expect) { void Compare(const std::string op_type, float input, float expect) {
framework::Scope scope; framework::Scope scope;
platform::CUDAPlace place; platform::CUDAPlace place;
platform::CUDADeviceContext ctx(place); platform::CUDADeviceContext ctx(place);
...@@ -35,6 +36,7 @@ void Compare(float input, float expect) { ...@@ -35,6 +36,7 @@ void Compare(float input, float expect) {
auto x_var = scope.Var("X"); auto x_var = scope.Var("X");
auto x_tensor = x_var->GetMutable<framework::LoDTensor>(); auto x_tensor = x_var->GetMutable<framework::LoDTensor>();
x_tensor->Resize({1, 1}); x_tensor->Resize({1, 1});
x_tensor->mutable_data<float>(place);
std::vector<float> init; std::vector<float> init;
init.push_back(input); init.push_back(input);
framework::TensorFromVector(init, ctx, x_tensor); framework::TensorFromVector(init, ctx, x_tensor);
...@@ -45,14 +47,15 @@ void Compare(float input, float expect) { ...@@ -45,14 +47,15 @@ void Compare(float input, float expect) {
out_tensor->mutable_data<float>(place); out_tensor->mutable_data<float>(place);
framework::OpDesc op_desc; framework::OpDesc op_desc;
op_desc.SetType("relu"); op_desc.SetType(op_type);
op_desc.SetInput("X", {"X"}); op_desc.SetInput("X", {"X"});
op_desc.SetOutput("Out", {"Out"}); op_desc.SetOutput("Out", {"Out"});
auto relu_op = framework::OpRegistry::CreateOp(op_desc); auto op = framework::OpRegistry::CreateOp(*op_desc.Proto());
// run fluid op // run fluid op
relu_op->Run(scope, place); op->Run(scope, place);
// get fluid output
std::vector<float> out1; std::vector<float> out1;
framework::TensorToVector(*out_tensor, ctx, &out1); framework::TensorToVector(*out_tensor, ctx, &out1);
...@@ -63,21 +66,28 @@ void Compare(float input, float expect) { ...@@ -63,21 +66,28 @@ void Compare(float input, float expect) {
engine->InitNetwork(); engine->InitNetwork();
engine->DeclareInput("X", nvinfer1::DataType::kFLOAT, engine->DeclareInput("X", nvinfer1::DataType::kFLOAT,
nvinfer1::DimsCHW{1, 1, 1}); nvinfer1::DimsCHW{1, 1, 1});
// convert op
OpConverter op_converter; OpConverter op_converter;
op_converter.ConvertOp(op_desc, engine); op_converter.ConvertOp(*op_desc.Proto(), engine);
engine->DeclareOutput("Out"); engine->DeclareOutput("Out");
engine->FreezeNetwork(); engine->FreezeNetwork();
engine->SetInputFromCPU("X", &input, 1 * sizeof(float));
// run tensorrt op // convert LoDTensor to ITensor
size_t size = x_tensor->memory_size();
EngineIOConverter::ConvertInput(op_type, *x_tensor,
engine->buffer("X").buffer, size, &stream);
// run tensorrt Outp
engine->Execute(1); engine->Execute(1);
// convert ITensor to LoDTensor
float out2; EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
engine->GetOutputInCPU("Out", &out2, 1 * sizeof(float)); out_tensor, size, &stream);
// get tensorrt output
ASSERT_EQ(out1[0], out2); std::vector<float> out2;
framework::TensorToVector(*out_tensor, ctx, &out2);
// compare
ASSERT_EQ(out1[0], out2[0]);
ASSERT_EQ(out1[0], expect); ASSERT_EQ(out1[0], expect);
delete engine; delete engine;
...@@ -85,8 +95,8 @@ void Compare(float input, float expect) { ...@@ -85,8 +95,8 @@ void Compare(float input, float expect) {
} }
TEST(OpConverter, ConvertRelu) { TEST(OpConverter, ConvertRelu) {
Compare(1, 1); // relu(1) = 1 Compare("relu", 1, 1); // relu(1) = 1
Compare(-5, 0); // relu(-5) = 0 Compare("relu", -5, 0); // relu(-5) = 0
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,40 +12,63 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h" #include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include <gtest/gtest.h>
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
class EngineInputConverterTester : public ::testing::Test { void IOConverterTester(const platform::DeviceContext& ctx) {
public: cudaStream_t stream;
void SetUp() override { tensor.Resize({10, 10}); } ASSERT_EQ(0, cudaStreamCreate(&stream));
framework::LoDTensor tensor; // init fluid in_tensor
}; framework::LoDTensor in_tensor;
in_tensor.Resize({10, 10});
auto place = ctx.GetPlace();
in_tensor.mutable_data<float>(place);
std::vector<float> init;
for (int64_t i = 0; i < 10 * 10; ++i) {
init.push_back(i);
}
framework::TensorFromVector(init, ctx, &in_tensor);
TEST_F(EngineInputConverterTester, DefaultCPU) { // init tensorrt buffer
void* buffer; void* buffer;
tensor.mutable_data<float>(platform::CPUPlace()); size_t size = in_tensor.memory_size();
ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); ASSERT_EQ(cudaMalloc(&buffer, size), 0);
cudaStream_t stream; // convert fluid in_tensor to tensorrt buffer
EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(), EngineIOConverter::ConvertInput("test", in_tensor, buffer, size, &stream);
&stream);
// convert tensorrt buffer to fluid out_tensor
framework::LoDTensor out_tensor;
out_tensor.Resize({10, 10});
out_tensor.mutable_data<float>(place);
EngineIOConverter::ConvertOutput("test", buffer, &out_tensor, size, &stream);
// compare in_tensor and out_tensor
std::vector<float> result;
framework::TensorToVector(out_tensor, ctx, &result);
EXPECT_EQ(init.size(), result.size());
for (size_t i = 0; i < init.size(); i++) {
EXPECT_EQ(init[i], result[i]);
}
cudaStreamDestroy(stream);
} }
TEST_F(EngineInputConverterTester, DefaultGPU) { TEST(EngineIOConverterTester, DefaultCPU) {
void* buffer; platform::CPUPlace place;
tensor.mutable_data<float>(platform::CUDAPlace()); platform::CPUDeviceContext ctx(place);
ASSERT_EQ(cudaMalloc(&buffer, tensor.memory_size()), 0); IOConverterTester(ctx);
}
cudaStream_t stream; TEST(EngineIOConverterTester, DefaultGPU) {
EngineInputConverter::Run("test", tensor, buffer, tensor.memory_size(), platform::CUDAPlace place;
&stream); platform::CUDADeviceContext ctx(place);
IOConverterTester(ctx);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -29,7 +29,7 @@ TEST(OpConverter, ConvertBlock) { ...@@ -29,7 +29,7 @@ TEST(OpConverter, ConvertBlock) {
conv2d_op->SetType("conv2d"); conv2d_op->SetType("conv2d");
OpConverter converter; OpConverter converter;
converter.ConvertBlock(*block, nullptr /*TensorRTEngine*/); converter.ConvertBlock(*block->Proto(), nullptr /*TensorRTEngine*/);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string(data_set, "cifar10", "Data set to test");
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model."); DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model.");
DEFINE_int32(batch_size, 1, "Batch size of input data"); DEFINE_int32(batch_size, 1, "Batch size of input data");
...@@ -35,19 +34,19 @@ TEST(inference, image_classification) { ...@@ -35,19 +34,19 @@ TEST(inference, image_classification) {
// 0. Call `paddle::framework::InitDevices()` initialize all the devices // 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
const bool is_combined = false;
std::vector<std::vector<int64_t>> feed_target_shapes =
GetFeedTargetShapes(dirname, is_combined);
paddle::framework::LoDTensor input; paddle::framework::LoDTensor input;
// Use normilized image pixels as input data, // Use normilized image pixels as input data,
// which should be in the range [0.0, 1.0]. // which should be in the range [0.0, 1.0].
if (FLAGS_data_set == "cifar10") { feed_target_shapes[0][0] = FLAGS_batch_size;
SetupTensor<float>(&input, {FLAGS_batch_size, 3, 32, 32}, paddle::framework::DDim input_dims =
static_cast<float>(0), static_cast<float>(1)); paddle::framework::make_ddim(feed_target_shapes[0]);
} else if (FLAGS_data_set == "imagenet") { LOG(INFO) << input_dims;
SetupTensor<float>(&input, {FLAGS_batch_size, 3, 224, 224}, SetupTensor<float>(&input, input_dims, static_cast<float>(0),
static_cast<float>(0), static_cast<float>(1)); static_cast<float>(1));
} else {
LOG(FATAL) << "Only cifar10 or imagenet is supported.";
}
std::vector<paddle::framework::LoDTensor*> cpu_feeds; std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&input); cpu_feeds.push_back(&input);
...@@ -60,7 +59,7 @@ TEST(inference, image_classification) { ...@@ -60,7 +59,7 @@ TEST(inference, image_classification) {
LOG(INFO) << "--- CPU Runs: ---"; LOG(INFO) << "--- CPU Runs: ---";
LOG(INFO) << "Batch size is " << FLAGS_batch_size; LOG(INFO) << "Batch size is " << FLAGS_batch_size;
TestInference<paddle::platform::CPUPlace, false, true>( TestInference<paddle::platform::CPUPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat); dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined);
LOG(INFO) << output1.dims(); LOG(INFO) << output1.dims();
} }
...@@ -73,7 +72,7 @@ TEST(inference, image_classification) { ...@@ -73,7 +72,7 @@ TEST(inference, image_classification) {
LOG(INFO) << "--- GPU Runs: ---"; LOG(INFO) << "--- GPU Runs: ---";
LOG(INFO) << "Batch size is " << FLAGS_batch_size; LOG(INFO) << "Batch size is " << FLAGS_batch_size;
TestInference<paddle::platform::CUDAPlace, false, true>( TestInference<paddle::platform::CUDAPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs2, FLAGS_repeat); dirname, cpu_feeds, cpu_fetchs2, FLAGS_repeat, is_combined);
LOG(INFO) << output2.dims(); LOG(INFO) << output2.dims();
if (!FLAGS_skip_cpu) { if (!FLAGS_skip_cpu) {
......
...@@ -89,6 +89,50 @@ void CheckError(const paddle::framework::LoDTensor& output1, ...@@ -89,6 +89,50 @@ void CheckError(const paddle::framework::LoDTensor& output1,
EXPECT_EQ(count, 0U) << "There are " << count << " different elements."; EXPECT_EQ(count, 0U) << "There are " << count << " different elements.";
} }
std::unique_ptr<paddle::framework::ProgramDesc> InitProgram(
paddle::framework::Executor* executor, paddle::framework::Scope* scope,
const std::string& dirname, const bool is_combined = false) {
std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
if (is_combined) {
// All parameters are saved in a single file.
// Hard-coding the file names of program and parameters in unittest.
// The file names should be consistent with that used in Python API
// `fluid.io.save_inference_model`.
std::string prog_filename = "__model_combined__";
std::string param_filename = "__params_combined__";
inference_program =
paddle::inference::Load(executor, scope, dirname + "/" + prog_filename,
dirname + "/" + param_filename);
} else {
// Parameters are saved in separate files sited in the specified
// `dirname`.
inference_program = paddle::inference::Load(executor, scope, dirname);
}
return inference_program;
}
std::vector<std::vector<int64_t>> GetFeedTargetShapes(
const std::string& dirname, const bool is_combined = false) {
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto* scope = new paddle::framework::Scope();
auto inference_program = InitProgram(&executor, scope, dirname, is_combined);
auto& global_block = inference_program->Block(0);
const std::vector<std::string>& feed_target_names =
inference_program->GetFeedTargetNames();
std::vector<std::vector<int64_t>> feed_target_shapes;
for (size_t i = 0; i < feed_target_names.size(); ++i) {
auto* var = global_block.FindVar(feed_target_names[i]);
std::vector<int64_t> var_shape = var->GetShape();
feed_target_shapes.push_back(var_shape);
}
delete scope;
return feed_target_shapes;
}
template <typename Place, bool CreateVars = true, bool PrepareContext = false> template <typename Place, bool CreateVars = true, bool PrepareContext = false>
void TestInference(const std::string& dirname, void TestInference(const std::string& dirname,
const std::vector<paddle::framework::LoDTensor*>& cpu_feeds, const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
...@@ -124,22 +168,7 @@ void TestInference(const std::string& dirname, ...@@ -124,22 +168,7 @@ void TestInference(const std::string& dirname,
paddle::platform::RecordEvent record_event( paddle::platform::RecordEvent record_event(
"init_program", "init_program",
paddle::platform::DeviceContextPool::Instance().Get(place)); paddle::platform::DeviceContextPool::Instance().Get(place));
inference_program = InitProgram(&executor, scope, dirname, is_combined);
if (is_combined) {
// All parameters are saved in a single file.
// Hard-coding the file names of program and parameters in unittest.
// The file names should be consistent with that used in Python API
// `fluid.io.save_inference_model`.
std::string prog_filename = "__model_combined__";
std::string param_filename = "__params_combined__";
inference_program = paddle::inference::Load(
&executor, scope, dirname + "/" + prog_filename,
dirname + "/" + param_filename);
} else {
// Parameters are saved in separate files sited in the specified
// `dirname`.
inference_program = paddle::inference::Load(&executor, scope, dirname);
}
} }
// Disable the profiler and print the timing information // Disable the profiler and print the timing information
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
...@@ -179,10 +208,10 @@ void TestInference(const std::string& dirname, ...@@ -179,10 +208,10 @@ void TestInference(const std::string& dirname,
if (PrepareContext) { if (PrepareContext) {
ctx = executor.Prepare(*inference_program, 0); ctx = executor.Prepare(*inference_program, 0);
executor.RunPreparedContext(ctx.get(), scope, &feed_targets, executor.RunPreparedContext(ctx.get(), scope, &feed_targets,
&fetch_targets, CreateVars); &fetch_targets, true, CreateVars);
} else { } else {
executor.Run(*inference_program, scope, &feed_targets, &fetch_targets, executor.Run(*inference_program, scope, &feed_targets, &fetch_targets,
CreateVars); true, CreateVars);
} }
// Enable the profiler // Enable the profiler
......
...@@ -166,6 +166,8 @@ function(op_library TARGET) ...@@ -166,6 +166,8 @@ function(op_library TARGET)
# NOTE(*): activation use macro to regist the kernels, set use_op manually. # NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation") if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP(relu);\n") file(APPEND ${pybind_file} "USE_OP(relu);\n")
elseif(${TARGET} STREQUAL "reduce")
file(APPEND ${pybind_file} "USE_OP(reduce_sum);\n")
else() else()
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
endif() endif()
...@@ -184,6 +186,7 @@ endif() ...@@ -184,6 +186,7 @@ endif()
add_subdirectory(detail) add_subdirectory(detail)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
op_library(send_op DEPS ${DISTRIBUTE_DEPS}) op_library(send_op DEPS ${DISTRIBUTE_DEPS})
...@@ -200,8 +203,15 @@ if(WITH_DISTRIBUTE) ...@@ -200,8 +203,15 @@ if(WITH_DISTRIBUTE)
set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor) cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor)
if(WITH_GPU)
cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor)
op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc)
set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else()
set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op)
endif()
else() else()
set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op gen_nccl_id_op)
endif() endif()
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
...@@ -270,6 +280,11 @@ foreach(src ${READER_LIBRARY}) ...@@ -270,6 +280,11 @@ foreach(src ${READER_LIBRARY})
set(OP_LIBRARY ${src} ${OP_LIBRARY}) set(OP_LIBRARY ${src} ${OP_LIBRARY})
endforeach() endforeach()
add_subdirectory(detection)
foreach(src ${DETECTION_LIBRARY})
set(OP_LIBRARY ${src} ${OP_LIBRARY})
endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
......
...@@ -63,8 +63,7 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -63,8 +63,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AccuracyOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
// TODO(typhoonzero): support both inference value and indices. // TODO(typhoonzero): support both inference value and indices.
AddInput("Out", "The network output of topk (inferences)"); AddInput("Out", "The network output of topk (inferences)");
AddInput("Indices", "The the network output of topk (indices)"); AddInput("Indices", "The the network output of topk (indices)");
......
...@@ -19,19 +19,18 @@ limitations under the License. */ ...@@ -19,19 +19,18 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \ class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \ : public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \ public: \
OP_NAME##OpMaker(OpProto *proto, OpAttrChecker *op_checker) \ void Make() override { \
: ::paddle::framework::OpProtoAndCheckerMaker(proto, op_checker) { \ AddInput("X", "Input of " #OP_NAME "operator"); \
AddInput("X", "Input of " #OP_NAME "operator"); \ AddOutput("Out", "Output of" #OP_NAME "operator"); \
AddOutput("Out", "Output of" #OP_NAME "operator"); \ AddAttr<bool>("use_mkldnn", \
AddAttr<bool>("use_mkldnn", \ "(bool, default false) Only used in mkldnn kernel") \
"(bool, default false) Only used in mkldnn kernel") \ .SetDefault(false); \
.SetDefault(false); \ AddComment(#OP_COMMENT); \
AddComment(#OP_COMMENT); \ } \
} \
} }
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
...@@ -204,8 +203,7 @@ $$out = \frac{x}{1 + |x|}$$ ...@@ -204,8 +203,7 @@ $$out = \frac{x}{1 + |x|}$$
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LeakyRelu operator"); AddInput("X", "Input of LeakyRelu operator");
AddOutput("Out", "Output of LeakyRelu operator"); AddOutput("Out", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f); AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
...@@ -220,8 +218,7 @@ $out = \max(x, \alpha * x)$ ...@@ -220,8 +218,7 @@ $out = \max(x, \alpha * x)$
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softshrink operator"); AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softshrink operator"); AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f); AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
...@@ -242,8 +239,7 @@ $$ ...@@ -242,8 +239,7 @@ $$
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardShrink operator"); AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator"); AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink") AddAttr<float>("threshold", "The value of threshold for HardShrink")
...@@ -265,8 +261,7 @@ $$ ...@@ -265,8 +261,7 @@ $$
class BReluOpMaker : public framework::OpProtoAndCheckerMaker { class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of BRelu operator"); AddInput("X", "Input of BRelu operator");
AddOutput("Out", "Output of BRelu operator"); AddOutput("Out", "Output of BRelu operator");
AddAttr<float>("t_min", "The min marginal value of BRelu") AddAttr<float>("t_min", "The min marginal value of BRelu")
...@@ -284,8 +279,7 @@ $out = \max(\min(x, t_{min}), t_{max})$ ...@@ -284,8 +279,7 @@ $out = \max(\min(x, t_{min}), t_{max})$
class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of SoftRelu operator"); AddInput("X", "Input of SoftRelu operator");
AddOutput("Out", "Output of SoftRelu operator"); AddOutput("Out", "Output of SoftRelu operator");
AddAttr<float>("threshold", "The threshold value of SoftRelu") AddAttr<float>("threshold", "The threshold value of SoftRelu")
...@@ -301,8 +295,7 @@ $out = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ ...@@ -301,8 +295,7 @@ $out = \ln(1 + \exp(\max(\min(x, threshold), threshold))$
class ELUOpMaker : public framework::OpProtoAndCheckerMaker { class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ELUOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of ELU operator"); AddInput("X", "Input of ELU operator");
AddOutput("Out", "Output of ELU operator"); AddOutput("Out", "Output of ELU operator");
AddAttr<float>("alpha", "The alpha value of ELU").SetDefault(1.0f); AddAttr<float>("alpha", "The alpha value of ELU").SetDefault(1.0f);
...@@ -320,8 +313,7 @@ $out = \max(0, x) + \min(0, \alpha * (e^x - 1))$ ...@@ -320,8 +313,7 @@ $out = \max(0, x) + \min(0, \alpha * (e^x - 1))$
class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Relu6OpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu6 operator"); AddInput("X", "Input of Relu6 operator");
AddOutput("Out", "Output of Relu6 operator"); AddOutput("Out", "Output of Relu6 operator");
AddAttr<float>("threshold", "The threshold value of Relu6") AddAttr<float>("threshold", "The threshold value of Relu6")
...@@ -337,8 +329,7 @@ $out = \min(\max(0, x), 6)$ ...@@ -337,8 +329,7 @@ $out = \min(\max(0, x), 6)$
class PowOpMaker : public framework::OpProtoAndCheckerMaker { class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PowOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Pow operator"); AddInput("X", "Input of Pow operator");
AddOutput("Out", "Output of Pow operator"); AddOutput("Out", "Output of Pow operator");
AddAttr<float>("factor", "The exponential factor of Pow").SetDefault(1.0f); AddAttr<float>("factor", "The exponential factor of Pow").SetDefault(1.0f);
...@@ -353,8 +344,7 @@ $out = x^{factor}$ ...@@ -353,8 +344,7 @@ $out = x^{factor}$
class STanhOpMaker : public framework::OpProtoAndCheckerMaker { class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
STanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of STanh operator"); AddInput("X", "Input of STanh operator");
AddOutput("Out", "Output of STanh operator"); AddOutput("Out", "Output of STanh operator");
AddAttr<float>("scale_a", "The scale parameter of a for the input") AddAttr<float>("scale_a", "The scale parameter of a for the input")
...@@ -372,8 +362,7 @@ $$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ ...@@ -372,8 +362,7 @@ $$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ThresholdedReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of ThresholdedRelu operator"); AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Out", "Output of ThresholdedRelu operator"); AddOutput("Out", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold", "The threshold location of activation") AddAttr<float>("threshold", "The threshold location of activation")
...@@ -394,8 +383,7 @@ $$ ...@@ -394,8 +383,7 @@ $$
class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HardSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardSigmoid operator"); AddInput("X", "Input of HardSigmoid operator");
AddOutput("Out", "Output of HardSigmoid operator"); AddOutput("Out", "Output of HardSigmoid operator");
AddAttr<float>("slope", "Slope for linear approximation of sigmoid") AddAttr<float>("slope", "Slope for linear approximation of sigmoid")
...@@ -420,8 +408,7 @@ It is recommended to use the defaults for this activation. ...@@ -420,8 +408,7 @@ It is recommended to use the defaults for this activation.
class SwishOpMaker : public framework::OpProtoAndCheckerMaker { class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SwishOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Swish operator"); AddInput("X", "Input of Swish operator");
AddOutput("Out", "Output of Swish operator"); AddOutput("Out", "Output of Swish operator");
AddAttr<float>("beta", "Constant beta of swish operator").SetDefault(1.0f); AddAttr<float>("beta", "Constant beta of swish operator").SetDefault(1.0f);
......
...@@ -66,8 +66,7 @@ class AdadeltaOp : public framework::OperatorWithKernel { ...@@ -66,8 +66,7 @@ class AdadeltaOp : public framework::OperatorWithKernel {
class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdadeltaOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient"); AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient");
......
...@@ -67,8 +67,7 @@ class AdagradOp : public framework::OperatorWithKernel { ...@@ -67,8 +67,7 @@ class AdagradOp : public framework::OperatorWithKernel {
class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdagradOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("Moment", "(Tensor) Second moment"); AddInput("Moment", "(Tensor) Second moment");
......
...@@ -80,8 +80,7 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -80,8 +80,7 @@ class AdamOp : public framework::OperatorWithKernel {
class AdamOpMaker : public framework::OpProtoAndCheckerMaker { class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdamOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate"); AddInput("LearningRate", "(Tensor) Learning rate");
......
...@@ -74,8 +74,7 @@ class AdamaxOp : public framework::OperatorWithKernel { ...@@ -74,8 +74,7 @@ class AdamaxOp : public framework::OperatorWithKernel {
class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker { class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdamaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate"); AddInput("LearningRate", "(Tensor) Learning rate");
......
...@@ -123,8 +123,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -123,8 +123,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ArrayToLoDTensorOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(std::vector<LodTensor>) A vector of tensors that is going to " "(std::vector<LodTensor>) A vector of tensors that is going to "
"be casted to a big LoDTensor."); "be casted to a big LoDTensor.");
......
...@@ -94,8 +94,7 @@ class AssignOp : public framework::OperatorBase { ...@@ -94,8 +94,7 @@ class AssignOp : public framework::OperatorBase {
class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker { class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AssignOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor, SelectedRows or LoDTensorArray) The input variable " "(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
"could be LoDTensor, SelectedRows or LoDTensorArray.") "could be LoDTensor, SelectedRows or LoDTensorArray.")
......
...@@ -45,8 +45,7 @@ class AssignValueOp : public framework::OperatorWithKernel { ...@@ -45,8 +45,7 @@ class AssignValueOp : public framework::OperatorWithKernel {
class AssignValueOpMaker : public framework::OpProtoAndCheckerMaker { class AssignValueOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AssignValueOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "(Tensor) Output tensor of assign_value operator."); AddOutput("Out", "(Tensor) Output tensor of assign_value operator.");
AddAttr<std::vector<int>>("shape", AddAttr<std::vector<int>>("shape",
"(vector<int>) " "(vector<int>) "
......
...@@ -50,8 +50,7 @@ class AucOp : public framework::OperatorWithKernel { ...@@ -50,8 +50,7 @@ class AucOp : public framework::OperatorWithKernel {
class AucOpMaker : public framework::OpProtoAndCheckerMaker { class AucOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AucOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Out", AddInput("Out",
"A floating point 2D tensor, values are in the range [0, 1]." "A floating point 2D tensor, values are in the range [0, 1]."
"Each row is sorted in descending order. This input should be the" "Each row is sorted in descending order. This input should be the"
......
...@@ -111,8 +111,7 @@ class AverageAccumulatesOp : public framework::OperatorWithKernel { ...@@ -111,8 +111,7 @@ class AverageAccumulatesOp : public framework::OperatorWithKernel {
class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AverageAccumulatesOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "(Tensor), The parameter to be accumulated."); AddInput("param", "(Tensor), The parameter to be accumulated.");
AddInput("in_sum_1", AddInput("in_sum_1",
"(Tensor), A tensor used to store the parameter " "(Tensor), A tensor used to store the parameter "
......
...@@ -126,8 +126,7 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -126,8 +126,7 @@ class BatchNormOp : public framework::OperatorWithKernel {
class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BatchNormOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<bool>("is_test", "").SetDefault(false); AddAttr<bool>("is_test", "").SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9); AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "") AddAttr<float>("epsilon", "")
......
...@@ -53,8 +53,7 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel { ...@@ -53,8 +53,7 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel {
class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BatchSizeLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() final {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", AddInput("Input",
"(Tensor) Tensor " "(Tensor) Tensor "
"whose input_dim_idx'th dimension specifies the batch_size"); "whose input_dim_idx'th dimension specifies the batch_size");
...@@ -68,7 +67,11 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,7 +67,11 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("output_dim_idx", AddAttr<int>("output_dim_idx",
"(int, default 0) The index of output's batch size dimension") "(int, default 0) The index of output's batch size dimension")
.SetDefault(0); .SetDefault(0);
Apply();
} }
protected:
virtual void Apply() = 0;
}; };
} // namespace operators } // namespace operators
......
...@@ -134,8 +134,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase { ...@@ -134,8 +134,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker { class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BeamSearchDecodeOpProtoMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids", AddInput("Ids",
"(LodTensorArray)" "(LodTensorArray)"
"score of the candidate words in each step"); "score of the candidate words in each step");
......
...@@ -197,8 +197,7 @@ std::string ItemToString(const BeamSearch::Item &item) { ...@@ -197,8 +197,7 @@ std::string ItemToString(const BeamSearch::Item &item) {
class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BeamSearchOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
// inputs and outputs stored in proto // inputs and outputs stored in proto
AddInput("pre_ids", "ids in previous step"); AddInput("pre_ids", "ids in previous step");
AddInput("ids", "a LoDTensor of shape of [None,k]"); AddInput("ids", "a LoDTensor of shape of [None,k]");
......
...@@ -41,8 +41,7 @@ class BilinearInterpOp : public framework::OperatorWithKernel { ...@@ -41,8 +41,7 @@ class BilinearInterpOp : public framework::OperatorWithKernel {
class BilinearInterpOpMaker : public framework::OpProtoAndCheckerMaker { class BilinearInterpOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BilinearInterpOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input tensor of bilinear interpolation, " "(Tensor) The input tensor of bilinear interpolation, "
"This is a 4-D tensor with shape of (N x C x h x w)"); "This is a 4-D tensor with shape of (N x C x h x w)");
......
...@@ -65,8 +65,7 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { ...@@ -65,8 +65,7 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel {
class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BilinearTensorProductOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of bilinear_tensor_product operator."); AddInput("X", "The first input of bilinear_tensor_product operator.");
AddInput("Y", "The second input of bilinear_tensor_product operator."); AddInput("Y", "The second input of bilinear_tensor_product operator.");
AddInput("Weight", AddInput("Weight",
......
...@@ -21,8 +21,7 @@ namespace operators { ...@@ -21,8 +21,7 @@ namespace operators {
class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CastOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of cast op"); AddInput("X", "The input tensor of cast op");
AddOutput("Out", "The output tensor of cast op"); AddOutput("Out", "The output tensor of cast op");
AddAttr<int>("out_dtype", "output data type"); AddAttr<int>("out_dtype", "output data type");
......
...@@ -50,8 +50,7 @@ class ChannelCloseOpOpInferShape : public framework::InferShapeBase { ...@@ -50,8 +50,7 @@ class ChannelCloseOpOpInferShape : public framework::InferShapeBase {
class ChannelCloseOpMaker : public framework::OpProtoAndCheckerMaker { class ChannelCloseOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChannelCloseOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(kChannel, AddInput(kChannel,
"The Channel Variable that should be closed by" "The Channel Variable that should be closed by"
" the ChannelClose Op."); " the ChannelClose Op.");
......
...@@ -91,8 +91,7 @@ class ChannelCreateOpOpInferShape : public framework::InferShapeBase { ...@@ -91,8 +91,7 @@ class ChannelCreateOpOpInferShape : public framework::InferShapeBase {
class ChannelCreateOpMaker : public framework::OpProtoAndCheckerMaker { class ChannelCreateOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChannelCreateOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput(kOutput, AddOutput(kOutput,
"The object of a Channel type created by ChannelCreate Op."); "The object of a Channel type created by ChannelCreate Op.");
AddAttr<int>("capacity", "The size of the buffer of Channel.") AddAttr<int>("capacity", "The size of the buffer of Channel.")
......
...@@ -72,8 +72,7 @@ class ChannelRecvOp : public framework::OperatorBase { ...@@ -72,8 +72,7 @@ class ChannelRecvOp : public framework::OperatorBase {
class ChannelRecvOpMaker : public framework::OpProtoAndCheckerMaker { class ChannelRecvOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChannelRecvOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(Channel, AddInput(Channel,
"(Channel) A variable which \"receives\" the a value sent" "(Channel) A variable which \"receives\" the a value sent"
"to it by a channel_send op.") "to it by a channel_send op.")
......
...@@ -57,8 +57,7 @@ class ChannelSendOp : public framework::OperatorBase { ...@@ -57,8 +57,7 @@ class ChannelSendOp : public framework::OperatorBase {
class ChannelSendOpMaker : public framework::OpProtoAndCheckerMaker { class ChannelSendOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChannelSendOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(Channel, AddInput(Channel,
"(Channel) A variable which \"sends\" the passed in value to " "(Channel) A variable which \"sends\" the passed in value to "
"a listening receiver.") "a listening receiver.")
......
...@@ -66,8 +66,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel { ...@@ -66,8 +66,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel {
class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChunkEvalOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Inference", AddInput("Inference",
"(Tensor, default: Tensor<int64_t>). " "(Tensor, default: Tensor<int64_t>). "
"Predictions from the network."); "Predictions from the network.");
......
...@@ -37,8 +37,7 @@ class ClipByNormOp : public framework::OperatorWithKernel { ...@@ -37,8 +37,7 @@ class ClipByNormOp : public framework::OperatorWithKernel {
class ClipByNormOpMaker : public framework::OpProtoAndCheckerMaker { class ClipByNormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ClipByNormOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input of clip_by_norm op." "(Tensor) The input of clip_by_norm op."
"The number of dimensions must be between [1, 9]."); "The number of dimensions must be between [1, 9].");
......
...@@ -38,8 +38,7 @@ class ClipOp : public framework::OperatorWithKernel { ...@@ -38,8 +38,7 @@ class ClipOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class ClipOpMaker : public framework::OpProtoAndCheckerMaker { class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ClipOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor)The input of clip op." "(Tensor)The input of clip op."
"The number of dimensions must be between [1, 9]."); "The number of dimensions must be between [1, 9].");
......
...@@ -21,8 +21,7 @@ namespace operators { ...@@ -21,8 +21,7 @@ namespace operators {
template <typename OpComment> template <typename OpComment>
class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CompareOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
OpComment comment; OpComment comment;
AddInput("X", AddInput("X",
string::Sprintf("(LoDTensor) the left hand operand of %s operator", string::Sprintf("(LoDTensor) the left hand operand of %s operator",
......
...@@ -63,8 +63,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -63,8 +63,7 @@ class ConcatOp : public framework::OperatorWithKernel {
class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConcatOpMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input tensors of concat operator.").AsDuplicable(); AddInput("X", "Input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "Output tensor of concat operator."); AddOutput("Out", "Output tensor of concat operator.");
AddAttr<int>("axis", AddAttr<int>("axis",
......
...@@ -108,8 +108,7 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -108,8 +108,7 @@ class ConditionalBlockOp : public ConditionalOp {
class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker { class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConditionalBlockOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) void Make() override {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The conditional variable of this operator. If X is empty, the " "The conditional variable of this operator. If X is empty, the "
"whole sub-block will not be executed.") "whole sub-block will not be executed.")
......
...@@ -106,8 +106,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( ...@@ -106,8 +106,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
library); library);
} }
Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Conv2DOpMaker::Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
"(Tensor) The input tensor of convolution operator. " "(Tensor) The input tensor of convolution operator. "
...@@ -200,8 +199,7 @@ $$ ...@@ -200,8 +199,7 @@ $$
)DOC"); )DOC");
} }
Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker) void Conv3DOpMaker::Make() {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
"(Tensor) The input tensor of convolution operator. " "(Tensor) The input tensor of convolution operator. "
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册