diff --git a/CMakeLists.txt b/CMakeLists.txt index 030bd19b3fd2f561a847bbc4613e5d2030812a92..d4fe4f9a0e4b90e34b95ddfba52e22ee762273a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,9 @@ endif() set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") +set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING + "A path setting fluid shared and static libraries") + if (WITH_C_API AND WITH_PYTHON) message(WARNING "It is suggest not embedded a python interpreter in Paddle " "when using C-API. It will give an unpredictable behavior when using a " @@ -117,13 +120,14 @@ else() endif() set(WITH_MKLML ${WITH_MKL}) -if (WITH_MKL AND AVX2_FOUND) - set(WITH_MKLDNN ON) -else() - message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN") - set(WITH_MKLDNN OFF) +if (NOT DEFINED WITH_MKLDNN) + if (WITH_MKL AND AVX2_FOUND) + set(WITH_MKLDNN ON) + else() + message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN") + set(WITH_MKLDNN OFF) + endif() endif() - ######################################################################################## include(external/mklml) # download mklml package diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 499682f644d60c16c3025870e7dd2a890630a2bb..5041504033e09a14546be4dfd6dfc52d366cd395 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -23,8 +23,12 @@ set(BOOST_PROJECT "extern_boost") # checked that the devtools package of CentOS 6 installs boost 1.41.0. # So we use 1.41.0 here. set(BOOST_VER "1.41.0") -set(BOOST_TAR "boost_1_41_0") -set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz") +if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL)) + message(STATUS "use pre defined download url") + set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE) + set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) +endif() +MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index e9a37b52e61b2525b047352cc70510df83eccb7f..5f8422ae76f4e2a22f3b3d5e650d8345c2622e7a 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -27,8 +27,12 @@ ENDIF() INCLUDE(ExternalProject) SET(MKLML_PROJECT "extern_mklml") -SET(MKLML_VER "mklml_lnx_2018.0.3.20180406") -SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz") +IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(MKLML_VER "mklml_lnx_2018.0.3.20180406" CACHE STRING "" FORCE) + SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) +ENDIF() +MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DST_DIR "mklml") diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 1d3e2ade6d393c6e4c37eea0dc1064cdb18808a5..65d61b7a38dde870a9217c8a68e81f7e593f88ec 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -231,7 +231,7 @@ endfunction(cc_binary) function(cc_test TARGET_NAME) if(WITH_TESTING) - set(options "") + set(options SERIAL) set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -241,6 +241,9 @@ function(cc_test TARGET_NAME) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if (${cc_test_SERIAL}) + set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + endif() endif() endfunction(cc_test) @@ -295,7 +298,7 @@ endfunction(nv_binary) function(nv_test TARGET_NAME) if (WITH_GPU AND WITH_TESTING) - set(options "") + set(options SERIAL) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -303,6 +306,9 @@ function(nv_test TARGET_NAME) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_test(${TARGET_NAME} ${TARGET_NAME}) + if (nv_test_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + endif() endif() endfunction(nv_test) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 7117a3a4f31c88b3c4a81e611146123903659ad5..b730ab43c49af005c00218c7430ab3c4d1a89510 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -52,32 +52,32 @@ function(copy TARGET) endfunction() # third party -set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/eigen3") +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/eigen3") copy(eigen3_lib SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported ) -set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/gflags") +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/gflags") copy(gflags_lib SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib ) -set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/glog") +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/glog") copy(glog_lib SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib ) -set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/boost/") +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/boost/") copy(boost_lib SRCS ${BOOST_INCLUDE_DIR}/boost DSTS ${dst_dir} ) if(NOT PROTOBUF_FOUND) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf") copy(protobuf_lib SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY} DSTS ${dst_dir} ${dst_dir}/lib @@ -85,13 +85,13 @@ if(NOT PROTOBUF_FOUND) endif() if(NOT CBLAS_FOUND) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/openblas") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas") copy(openblas_lib SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include DSTS ${dst_dir} ${dst_dir} ) elseif (WITH_MKLML) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mklml") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mklml") copy(mklml_lib SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR} DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir} @@ -99,7 +99,7 @@ elseif (WITH_MKLML) endif() if(WITH_MKLDNN) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mkldnn") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mkldnn") copy(mkldnn_lib SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} DSTS ${dst_dir} ${dst_dir}/lib @@ -107,17 +107,17 @@ if(WITH_MKLDNN) endif() if(NOT MOBILE_INFERENCE AND NOT RPI) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") copy(snappy_lib SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream") copy(snappystream_lib SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib) - set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib") + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") copy(zlib_lib SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib) @@ -125,7 +125,7 @@ endif() # paddle fluid module set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") -set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") +set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") set(module "framework") copy(framework_lib DEPS framework_py_proto SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h @@ -165,7 +165,7 @@ copy(pybind_lib # CMakeCache Info copy(cmake_cache SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${CMAKE_INSTALL_PREFIX}) + DSTS ${FLUID_INSTALL_DIR}) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) @@ -173,7 +173,7 @@ add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) execute_process( COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 OUTPUT_VARIABLE PADDLE_GIT_COMMIT) -set(version_file ${CMAKE_INSTALL_PREFIX}/version.txt) +set(version_file ${FLUID_INSTALL_DIR}/version.txt) file(WRITE ${version_file} "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "WITH_MKL: ${WITH_MKL}\n" diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index ff3c9346a2cd777a5294d536911f39de9032fe52..9ae7ffb2604250aebfd9ecd8966384c3ef05f97b 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -485,7 +485,7 @@ roi_pool .. autofunction:: paddle.fluid.layers.roi_pool :noindex: - + ops === @@ -828,4 +828,10 @@ topk .. autofunction:: paddle.fluid.layers.topk :noindex: +dice_loss +---- + +.. autofunction:: paddle.fluid.layers.dice_loss + :noindex: + diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst index f846928954dd3a05e11054ce2ff2ff839fbefd4b..330e84346e28db30d16d4a95490ddcab431228a0 100644 --- a/doc/v2/build_and_install/build_from_source_cn.rst +++ b/doc/v2/build_and_install/build_from_source_cn.rst @@ -35,7 +35,7 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 # 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像 docker build -t paddle:dev . # 3. 执行下面的命令编译CPU-Only的二进制 - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build # 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步) docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst index d1b5b88dff81d4c5cee3dd13a7dccbc333ab6a17..0a6c33985ed65e24e507744c49cf929c9481195c 100644 --- a/doc/v2/build_and_install/build_from_source_en.rst +++ b/doc/v2/build_and_install/build_from_source_en.rst @@ -34,7 +34,7 @@ Or you can build your own image from source as the optional step below: # 2. Optional: build development docker image from source docker build -t paddle:dev . # 3. Run the following command to build a CPU-Only binaries - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build # 4. Or, use your built Docker image to build PaddlePaddle (must run step 2) docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev diff --git a/contrib/float16/.gitignore b/paddle/contrib/float16/.gitignore similarity index 100% rename from contrib/float16/.gitignore rename to paddle/contrib/float16/.gitignore diff --git a/contrib/float16/README.md b/paddle/contrib/float16/README.md similarity index 100% rename from contrib/float16/README.md rename to paddle/contrib/float16/README.md diff --git a/contrib/float16/float16_benchmark.md b/paddle/contrib/float16/float16_benchmark.md similarity index 100% rename from contrib/float16/float16_benchmark.md rename to paddle/contrib/float16/float16_benchmark.md diff --git a/contrib/float16/float16_inference_demo.py b/paddle/contrib/float16/float16_inference_demo.py similarity index 100% rename from contrib/float16/float16_inference_demo.py rename to paddle/contrib/float16/float16_inference_demo.py diff --git a/contrib/float16/float16_transpiler.py b/paddle/contrib/float16/float16_transpiler.py similarity index 100% rename from contrib/float16/float16_transpiler.py rename to paddle/contrib/float16/float16_transpiler.py diff --git a/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh similarity index 100% rename from contrib/float16/run_float16_demo.sh rename to paddle/contrib/float16/run_float16_demo.sh diff --git a/contrib/inference/README.md b/paddle/contrib/inference/README.md similarity index 100% rename from contrib/inference/README.md rename to paddle/contrib/inference/README.md diff --git a/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h similarity index 100% rename from contrib/inference/paddle_inference_api.h rename to paddle/contrib/inference/paddle_inference_api.h diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 4fb4511d99179e4ea14cde66feb13bc9e114581a..602246d75d708db5108e5320e50a27fd9cd580f8 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,4 +1,5 @@ nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc - DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) + DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine + SERIAL) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) diff --git a/paddle/fluid/inference/tests/book/test_inference_image_classification.cc b/paddle/fluid/inference/tests/book/test_inference_image_classification.cc index 60c761c5281e2f535aab0200c93fb738addcdb87..987da18116cc6f4902bd66ae317f2470a8bc5057 100644 --- a/paddle/fluid/inference/tests/book/test_inference_image_classification.cc +++ b/paddle/fluid/inference/tests/book/test_inference_image_classification.cc @@ -21,6 +21,7 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model."); DEFINE_int32(batch_size, 1, "Batch size of input data"); DEFINE_int32(repeat, 1, "Running the inference program repeat times"); DEFINE_bool(skip_cpu, false, "Skip the cpu test"); +DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); TEST(inference, image_classification) { if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) { @@ -58,8 +59,10 @@ TEST(inference, image_classification) { // Run inference on CPU LOG(INFO) << "--- CPU Runs: ---"; LOG(INFO) << "Batch size is " << FLAGS_batch_size; + LOG(INFO) << "FLAGS_use_mkldnn: " << FLAGS_use_mkldnn; TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined); + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined, + FLAGS_use_mkldnn); LOG(INFO) << output1.dims(); } diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index cc1589514aab3b973b4909159748bc4223cdce46..01b8dc0be662da22fe15a79cd9abfe5fa92c9577 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -133,11 +133,24 @@ std::vector> GetFeedTargetShapes( return feed_target_shapes; } +void EnableMKLDNN( + const std::unique_ptr& program) { + for (size_t bid = 0; bid < program->Size(); ++bid) { + auto* block = program->MutableBlock(bid); + for (auto* op : block->AllOps()) { + if (op->HasAttr("use_mkldnn")) { + op->SetAttr("use_mkldnn", true); + } + } + } +} + template void TestInference(const std::string& dirname, const std::vector& cpu_feeds, const std::vector& cpu_fetchs, - const int repeat = 1, const bool is_combined = false) { + const int repeat = 1, const bool is_combined = false, + const bool use_mkldnn = false) { // 1. Define place, executor, scope auto place = Place(); auto executor = paddle::framework::Executor(place); @@ -169,6 +182,9 @@ void TestInference(const std::string& dirname, "init_program", paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); + if (use_mkldnn) { + EnableMKLDNN(inference_program); + } } // Disable the profiler and print the timing information paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 7fce138e3f47e0eb485afb4d5a665eb41f68e286..bc7faef8cd499e63af4d0ab2282897c39f2b7faa 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -202,10 +202,12 @@ if(WITH_DISTRIBUTE) op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor) + cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op + listen_and_serv_op sum_op executor SERIAL) if(WITH_GPU) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor) + cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op + listen_and_serv_op executor SERIAL) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) else() diff --git a/paddle/fluid/operators/detail/CMakeLists.txt b/paddle/fluid/operators/detail/CMakeLists.txt index 719a7465b8d58ef8588ff1e83c2b971eb6fbb00f..b9a66474c9afc27462f9c47af1a0465e2cec70bc 100644 --- a/paddle/fluid/operators/detail/CMakeLists.txt +++ b/paddle/fluid/operators/detail/CMakeLists.txt @@ -4,6 +4,8 @@ if(WITH_DISTRIBUTE) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set_source_files_properties(serde_test.cc grpc_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(serde_test SRCS serde_test.cc variable_response.cc DEPS grpc++_unsecure grpc_unsecure gpr - cares zlib protobuf sendrecvop_grpc) - cc_test(grpc_server_test SRCS grpc_server_test.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op) + cares zlib protobuf sendrecvop_grpc SERIAL) + cc_test(grpc_server_test SRCS grpc_server_test.cc DEPS sendrecvop_grpc + grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor + proto_desc lookup_table_op SERIAL) endif() diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 705cc894c06b207f4e4e45fc771c04fa3cbdf6d5..ab70c1f0592d122ba248a101db487e64c0bdae6f 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -186,8 +186,7 @@ class WarpCTCKernel : public framework::OpKernel { // warpctc accesses labels in CPU memory Tensor warpctc_label; - TensorCopy(*label, platform::CPUPlace(), ctx.device_context(), - &warpctc_label); + TensorCopySync(*label, platform::CPUPlace(), &warpctc_label); const int* warpctc_label_data = warpctc_label.data(); // warpctc stores loss in CPU memory Tensor warpctc_loss; diff --git a/paddle/fluid/train/demo/README.md b/paddle/fluid/train/demo/README.md index fd80a77b02e60c15ae6c58486ed7cbbb6ffefabc..41b01d33828f750f67bba5f82cb7ed6fe4d4ea0a 100644 --- a/paddle/fluid/train/demo/README.md +++ b/paddle/fluid/train/demo/README.md @@ -7,7 +7,7 @@ # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -cmake .. -DCMAKE_INSTALL_PREFIX=$PADDLE_LIB \ +cmake .. -DFLUID_INSTALL_DIR=$PADDLE_LIB \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_FLUID_ONLY=ON \ -DWITH_GPU=OFF \ @@ -42,7 +42,7 @@ cd build # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -# PADDLE_LIB is the same with CMAKE_INSTALL_PREFIX when building the lib +# PADDLE_LIB is the same with FLUID_INSTALL_DIR when building the lib cmake .. -DPADDLE_LIB=$PADDLE_LIB \ -DWITH_MKLDNN=OFF \ -DWITH_MKL=OFF diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index fbe219a1c9cf85f19ae2ab991ae7e4207858f204..8d8cfec4ca55571bd64f1788e6983d7381e85fc5 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -433,7 +433,7 @@ EOF EOF if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&" + NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.1.2-1+cuda${CUDA_MAJOR} libnccl-dev=2.1.2-1+cuda${CUDA_MAJOR} &&" else NCCL_DEPS="" fi @@ -493,7 +493,8 @@ function gen_fluid_inference_lib() { ======================================== EOF make -j `nproc` inference_lib_dist - fi + tar -cf ${PADDLE_ROOT}/build/fluid.tgz ${PADDLE_ROOT}/build/fluid_install_dir + fi } function main() { diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index dee41448081cbfcd8224ce2abbf3ba7b7b97eb7c..d1ea9f148566d20988a43f4c9d421c4452697ef1 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -1098,7 +1098,7 @@ class ConditionalBlock(object): input_set = set([ipt.name for ipt in self.inputs]) param_list = [ - parent_block.var(each_name) for each_name in params + parent_block.var_recursive(each_name) for each_name in params if each_name not in input_set ] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 561c8bd42f90911bf5a0c898fe01412d42d2c9b1..75f7ec2f853fb6389d0f78e81aa63e40b1c25dc5 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -80,6 +80,7 @@ __all__ = [ 'pad', 'label_smooth', 'roi_pool', + 'dice_loss', ] @@ -699,8 +700,8 @@ def dynamic_gru(input, def gru_unit(input, hidden, size, - weight=None, - bias=None, + param_attr=None, + bias_attr=None, activation='tanh', gate_activation='sigmoid'): """ @@ -731,8 +732,8 @@ def gru_unit(input, input (Variable): The fc transformed input value of current step. hidden (Variable): The hidden value of lstm unit from previous step. size (integer): The input dimension value. - weight (ParamAttr): The weight parameters for gru unit. Default: None - bias (ParamAttr): The bias parameters for gru unit. Default: None + param_attr (ParamAttr): The weight parameters for gru unit. Default: None + bias_attr (ParamAttr): The bias parameters for gru unit. Default: None activation (string): The activation type for cell (actNode). Default: 'tanh' gate_activation (string): The activation type for gates (actGate). @@ -764,34 +765,31 @@ def gru_unit(input, size = size / 3 # create weight - if weight is None: - weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) + weight = helper.create_parameter( + attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) + gate = helper.create_tmp_variable(dtype) + reset_hidden_pre = helper.create_tmp_variable(dtype) + updated_hidden = helper.create_tmp_variable(dtype) + inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight} # create bias - - if bias is None: + if helper.bias_attr: bias_size = [1, 3 * size] bias = helper.create_parameter( attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) - - gate = helper.create_tmp_variable(dtype) - reset_hidden_pre = helper.create_tmp_variable(dtype) - updated_hidden = helper.create_tmp_variable(dtype) + inputs['Bias'] = bias helper.append_op( type='gru_unit', - inputs={'Input': input, - 'HiddenPrev': hidden, - 'Weight': weight}, + inputs=inputs, outputs={ 'Gate': gate, 'ResetHiddenPrev': reset_hidden_pre, 'Hidden': updated_hidden, }, attrs={ - 'activation': 0, - 'gate_activation': 1, + 'activation': 2, # tanh + 'gate_activation': 1, # sigmoid }) return updated_hidden, reset_hidden_pre, gate @@ -3819,3 +3817,43 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): "spatial_scale": spatial_scale }) return pool_out + + +def dice_loss(input, label, epsilon=0.00001): + """ + **Dice loss Layer** + Dice loss for comparing the similarity of two batch of data, + usually is used for binary image segmentation i.e. labels are binary. + The dice loss can be defined as below equation: + + .. math:: + + dice\_loss &= 1 - \\frac{2 * intersection\_area}{total\_area} \\\\ + &= \\frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\\\ + &= \\frac{(union\_area - intersection\_area)}{total\_area} + + + Args: + input (Variable): The predictions with rank>=2. The first dimension is batch size, + and the last dimension is class number. + label (Variable): The groud truth with the same rank with input. The first dimension + is batch size, and the last dimension is 1. + epsilon (float): The epsilon will be added to the numerator and denominator. + If both input and label are empty, it makes sure dice is 1. + Default: 0.00001 + + Returns: + dice_loss (Variable): The dice loss with shape [1]. + + Examples: + predictions = fluid.layers.softmax(x) + loss = fluid.layers.dice_loss(input=predictions, label=label, 2) + """ + label = one_hot(label, depth=input.shape[-1]) + reduce_dim = range(1, len(input.shape)) + inse = reduce_sum(input * label, dim=reduce_dim) + dice_denominator = reduce_sum( + input, dim=reduce_dim) + reduce_sum( + label, dim=reduce_dim) + dice_score = 1 - inse * 2 / (dice_denominator + epsilon) + return reduce_mean(dice_score) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..89179fc586cde99318a17bab287441c0f2d6c369 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -0,0 +1,149 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +BATCH_SIZE = 128 + + +def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM) + return net + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def train(use_cuda, train_program, save_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer=optimizer) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(save_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(save_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, save_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=save_dirname, + place=place) + + def create_random_lodtensor(lod, place, low, high): + data = np.random.random_integers(low, high, + [lod[-1], 1]).astype("int64") + res = fluid.LoDTensor() + res.set(data, place) + res.set_lod([lod]) + return res + + lod = [0, 4, 10] + tensor_words = create_random_lodtensor( + lod, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + save_path = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, save_path) + infer(use_cuda, inference_program, save_path) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7db097b3b377c763ceed9fa909672088effe50cf --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -0,0 +1,164 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +BATCH_SIZE = 128 +LSTM_SIZE = 128 + + +def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh') + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + word = rnn.step_input(sentence) + prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) + prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) + + def gate_common(ipt, hidden, size): + gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) + gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) + return gate0 + gate1 + + forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + + cell = forget_gate * prev_cell + input_gate * cell_gate + hidden = output_gate * fluid.layers.tanh(x=cell) + rnn.update_memory(prev_cell, cell) + rnn.update_memory(prev_hidden, hidden) + rnn.output(hidden) + + last = fluid.layers.sequence_last_step(rnn()) + prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE) + return pred + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def train(use_cuda, train_program, save_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer=optimizer) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(save_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(save_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, save_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=save_dirname, + place=place) + + def create_random_lodtensor(lod, place, low, high): + data = np.random.random_integers(low, high, + [lod[-1], 1]).astype("int64") + res = fluid.LoDTensor() + res.set(data, place) + res.set_lod([lod]) + return res + + lod = [0, 4, 10] + tensor_words = create_random_lodtensor( + lod, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + save_path = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, save_path) + infer(use_cuda, inference_program, save_path) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 2ae9653953c2f5f6a399243bef2c7fb756f9692f..be704a7be7d2c9f3c95ad81ca906eeaf73b35beb 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -26,7 +26,7 @@ list(REMOVE_ITEM TEST_OPS decorators) # decorators is a helper python file, not function(py_test_modules TARGET_NAME) if(WITH_TESTING) - set(options "") + set(options SERIAL) set(oneValueArgs "") set(multiValueArgs MODULES DEPS ENVS) cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -34,6 +34,9 @@ function(py_test_modules TARGET_NAME) COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if (py_test_modules_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + endif() endif() endfunction() @@ -81,7 +84,7 @@ endif(WITH_FAST_BUNDLE_TEST) py_test_modules(test_sequence_expand MODULES test_sequence_expand) # tests with high overhead py_test_modules(test_parallel_executor MODULES test_parallel_executor) -py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR}) +py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) py_test_modules(test_train_dyn_rnn MODULES test_dyn_rnn) py_test_modules(test_mul_op MODULES test_mul_op) py_test_modules(test_network_with_dtype MODULES test_network_with_dtype) @@ -106,4 +109,4 @@ py_test_modules(test_registry MODULES test_registry) py_test_modules(test_fetch_var MODULES test_fetch_var) py_test_modules(test_dynrnn_static_input MODULES test_dynrnn_static_input) py_test_modules(test_parallel_op MODULES test_parallel_op) -py_test_modules(test_dist_train MODULES test_dist_train) +py_test_modules(test_dist_train MODULES test_dist_train SERIAL)