diff --git a/CMakeLists.txt b/CMakeLists.txt index 265ddc9504167f21f54a1b1e7777147b3b6d37d9..fb796103350ac4403d4151cf08eb4315bcde68fd 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,10 @@ include(generic) # simplify cmake module # TODO(Shibo Tao): remove find_package(CUDA) completely. find_package(CUDA QUIET) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) - +option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN" OFF) +if (WITH_GPU AND WITH_XPU) + message(FATAL_ERROR "Error when compile GPU and XPU at the same time") +endif() # cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them. if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15)) message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. " diff --git a/README.md b/README.md index 4196811e37f73f84b0327f5cbf1996aaaf7e6dcc..d14d0ef00148140bd931bbc692fbe15bb21a7bf3 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ pip install paddlepaddle # Linux GPU cuda10cudnn7 pip install paddlepaddle-gpu # Linux GPU cuda9cudnn7 -pip install paddlepaddle-gpu==1.8.3.post97 +pip install paddlepaddle-gpu==1.8.4.post97 ``` It is recommended to read [this doc](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/install/index_en.html) on our website. diff --git a/README_cn.md b/README_cn.md index 93ad06d20010fcba1ff3382b169cb78328f2a375..e4544a3eff6e55a29fcfa806786e55e0ac41a672 100644 --- a/README_cn.md +++ b/README_cn.md @@ -30,7 +30,7 @@ pip install paddlepaddle # Linux GPU cuda10cudnn7 pip install paddlepaddle-gpu # Linux GPU cuda9cudnn7 -pip install paddlepaddle-gpu==1.8.3.post97 +pip install paddlepaddle-gpu==1.8.4.post97 ``` 更多安装信息详见官网 [安装说明](http://www.paddlepaddle.org.cn/documentation/docs/zh/1.8/beginners_guide/install/index_cn.html) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index bb57b42dcc74114312a400a0f6cc95df307de6bb..cf458d97706755e794c5fbb1ba9d3fcb51e9d1ce 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -63,6 +63,11 @@ if(WITH_BOX_PS) add_definitions(-DPADDLE_WITH_BOX_PS) endif() +if(WITH_XPU) + message(STATUS "Compile with XPU!") + add_definitions(-DPADDLE_WITH_XPU) +endif() + if(WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA) add_definitions(-DEIGEN_USE_GPU) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index bb92eae732e1ebe98dc05890d90bf8b53d8b41a9..b7a93cd9ee2160090c0142d62d96da72e4c58717 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -61,6 +61,10 @@ function(detect_installed_gpus out_variable) if(NOT CUDA_gpu_detect_output) message(STATUS "Automatic GPU detection failed. Building for all known architectures.") set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) + #Todo: fix Automatic GPU detection failed on windows + if(WIN32) + set(${out_variable} "61 75" PARENT_SCOPE) + endif() else() set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) endif() diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake index 4a343b2c6af2ce64d65203ae5955b2d552055198..6f790f1af8e1a03d1101244a4d82045331b44c13 100644 --- a/cmake/external/cub.cmake +++ b/cmake/external/cub.cmake @@ -17,7 +17,7 @@ include(ExternalProject) set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub) set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub) set(CUB_REPOSITORY https://github.com/NVlabs/cub.git) -set(CUB_TAG 1.9.8) +set(CUB_TAG 1.8.0) cache_third_party(extern_cub REPOSITORY ${CUB_REPOSITORY} diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake index 337e326dc166fd844a938ecd936d8c4162a45573..895bc0849a2a3b57e9e7ba2576567032f07fb35b 100644 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -14,13 +14,21 @@ INCLUDE(ExternalProject) +execute_process(COMMAND bash -c "gcc -dumpversion" OUTPUT_VARIABLE GCC_VERSION) + SET(GLOO_PROJECT "extern_gloo") IF((NOT DEFINED GLOO_VER) OR (NOT DEFINED GLOO_URL)) MESSAGE(STATUS "use pre defined download url") SET(GLOO_VER "master" CACHE STRING "" FORCE) SET(GLOO_NAME "gloo" CACHE STRING "" FORCE) - SET(GLOO_URL "https://pslib.bj.bcebos.com/gloo.tar.gz" CACHE STRING "" FORCE) + + if(${GCC_VERSION} VERSION_EQUAL "8.2.0") + SET(GLOO_URL "https://fleet.bj.bcebos.com/gloo/gloo.tar.gz.gcc8" CACHE STRING "" FORCE) + else() + SET(GLOO_URL "https://fleet.bj.bcebos.com/gloo/gloo.tar.gz.gcc482" CACHE STRING "" FORCE) + endif() ENDIF() + MESSAGE(STATUS "GLOO_NAME: ${GLOO_NAME}, GLOO_URL: ${GLOO_URL}") SET(GLOO_SOURCE_DIR "${THIRD_PARTY_PATH}/gloo") SET(GLOO_DOWNLOAD_DIR "${GLOO_SOURCE_DIR}/src/${GLOO_PROJECT}") diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8a927d8e282a03e8a74c0814ee8d9b247451a091 --- /dev/null +++ b/cmake/external/xpu.cmake @@ -0,0 +1,54 @@ +if (NOT WITH_XPU) + return() +endif() + +INCLUDE(ExternalProject) +SET(XPU_PROJECT "extern_xpu") +SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE) +SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") +SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") +SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") +SET(XPU_API_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/api/include") +SET(XPU_RUNTIME_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/runtime/include") +SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib") + +SET(XPU_API_LIB_NAME "libxpuapi.so") +SET(XPU_RT_LIB_NAME "libxpurt.so") +SET(XPU_SIM_LIB_NAME "libxpusim.so") +SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}") +SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}") +SET(XPU_SIM_LIB "${XPU_LIB_DIR}/${XPU_SIM_LIB_NAME}") + +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib") + +INCLUDE_DIRECTORIES(${XPU_API_INC_DIR}) +INCLUDE_DIRECTORIES(${XPU_RUNTIME_INC_DIR}) + +FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(XPU)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY xpu/api xpu/runtime xpu/lib \n" + " DESTINATION ${XPU_INSTALL_DIR})\n") + +ExternalProject_Add( + ${XPU_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${XPU_SOURCE_DIR} + DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${XPU_URL} -c -q -O xpu.tar.gz + && tar xvf xpu.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT} +) + +ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL) +set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}") + +# generate a static dummy target to track xpulib dependencies +# for cc_library(xxx SRCS xxx.c DEPS xpulib) +generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake") + +TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_SIM_LIB}) +ADD_DEPENDENCIES(xpulib ${XPU_PROJECT}) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 8842e8e21c6df224bb6341a4f7f526e3d61e92e1..26538c78b172bdf8efa9f7c08e80593de25f676c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -384,8 +384,8 @@ function(cc_test_run TARGET_NAME) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) - # No unit test should exceed 10 minutes. - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) + # No unit test should exceed 2 minutes. + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) endif() endfunction() @@ -743,8 +743,8 @@ function(py_test TARGET_NAME) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() - # No unit test should exceed 10 minutes. - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) + # No unit test should exceed 2 minutes. + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 120) endif() endfunction() diff --git a/cmake/operators.cmake b/cmake/operators.cmake index e927fae63f0fc28902431c7b09350c7f7d10c52a..f60a6dc3f0c89dd345b04ea3a1e213de770e5760 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -8,6 +8,7 @@ function(op_library TARGET) set(hip_cu_srcs) set(miopen_hip_cc_srcs) set(cu_cc_srcs) + set(xpu_cc_srcs) set(cudnn_cu_cc_srcs) set(cudnn_cu_srcs) set(CUDNN_FILE) @@ -60,6 +61,12 @@ function(op_library TARGET) list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc) endif() endif() + if(WITH_XPU) + string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}") + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc) + list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc) + endif() + endif() else() foreach(src ${op_library_SRCS}) if (${src} MATCHES ".*\\.hip.cu$") @@ -76,6 +83,8 @@ function(op_library TARGET) list(APPEND mkldnn_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cu.cc$") list(APPEND cu_cc_srcs ${src}) + elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$") + list(APPEND xpu_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cc$") list(APPEND cc_srcs ${src}) else() @@ -109,7 +118,7 @@ function(op_library TARGET) hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) else() - cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} + cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() @@ -150,10 +159,11 @@ function(op_library TARGET) list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) + list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) list(LENGTH hip_cu_srcs hip_cu_srcs_len) list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len) if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND - ${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0) + ${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0) file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") set(pybind_flag 1) endif() @@ -179,6 +189,9 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n") endif() + if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0) + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, XPU);\n") + endif() # pybind USE_OP_DEVICE_KERNEL for MKLDNN if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) # Append first implemented MKLDNN activation operator @@ -228,6 +241,7 @@ function(register_operators) file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") string(REPLACE "_mkldnn" "" OPS "${OPS}") + string(REPLACE "_xpu" "" OPS "${OPS}") string(REPLACE ".cc" "" OPS "${OPS}") list(REMOVE_DUPLICATES OPS) list(LENGTH register_operators_DEPS register_operators_DEPS_len) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index be536b2eefbb123d73ec6f8d17c3d22e5aca2cfc..c9442e8f843ac152cac02908799a8d24f5951e58 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -250,6 +250,11 @@ if(WITH_GPU) file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage endif(WITH_GPU) +if(WITH_XPU) + include(external/xpu) # download, build, install xpu + list(APPEND third_party_deps extern_xpu) +endif(WITH_XPU) + if(WITH_PSLIB) include(external/pslib) # download, build, install pslib list(APPEND third_party_deps extern_pslib) @@ -263,10 +268,6 @@ if(WITH_PSLIB) endif() endif(WITH_PSLIB) -if(NOT WIN32 AND NOT APPLE) - include(external/gloo) - list(APPEND third_party_deps extern_gloo) -endif() if(WITH_BOX_PS) include(external/box_ps) @@ -274,6 +275,11 @@ if(WITH_BOX_PS) endif(WITH_BOX_PS) if(WITH_DISTRIBUTE) + if(WITH_GLOO) + include(external/gloo) + list(APPEND third_party_deps extern_gloo) + endif() + if(WITH_GRPC) list(APPEND third_party_deps extern_grpc) else() diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index d725bdffa010b9427f934dbd4f3e741fb63b4ae4..10d2c2c6c9172ef2025d72e1723d74c8423aed1d 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -122,6 +122,10 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor cc_library(attribute SRCS attribute.cc DEPS framework_proto boost) cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc device_context) + +cc_library(op_version_registry SRCS op_version_registry.cc DEPS framework_proto boost) +cc_test(op_version_registry_test SRCS op_version_registry_test.cc DEPS op_version_registry) + cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_library(no_need_buffer_vars_inference SRCS no_need_buffer_vars_inference.cc DEPS attribute device_context) @@ -268,6 +272,7 @@ cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatib cc_library(save_load_util SRCS save_load_util DEPS tensor scope layer) cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) +cc_library(generator SRCS generator.cc) # Get the current working branch execute_process( diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index f2421248e33f236b9fa861f22ce4848531cf1791..180b33d0cb72e2c4c9e6e8caff9f0ef5f1b04689 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -70,6 +70,11 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> { return ctx; } + inline ::DLContext operator()(const platform::XPUPlace &place) const { + PADDLE_THROW( + platform::errors::Unimplemented("platform::XPUPlace is not supported")); + } + inline ::DLContext operator()(const platform::CUDAPlace &place) const { #ifdef PADDLE_WITH_CUDA ::DLContext ctx; diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 8e2e1d38a66d1039519bab312f77bef6604d8ec1..f11edb9a41bdcbcb33efc600f1d7d8f70fb76f45 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -444,8 +444,8 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx, int64_t max_memory_size = GetEagerDeletionThreshold(); std::unique_ptr gc; if (!ctx->force_disable_gc_ && max_memory_size >= 0) { -#ifdef PADDLE_WITH_CUDA if (platform::is_gpu_place(place_)) { +#ifdef PADDLE_WITH_CUDA if (IsFastEagerDeletionModeEnabled()) { gc.reset(new UnsafeFastGPUGarbageCollector( BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size)); @@ -453,13 +453,22 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx, gc.reset(new DefaultStreamGarbageCollector( BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size)); } - } else if (platform::is_cpu_place(place_)) { +#else + PADDLE_THROW( + platform::errors::Unimplemented("No GPU gc found in CPU/XPU paddle")); #endif + } else if (platform::is_cpu_place(place_)) { gc.reset(new CPUGarbageCollector( BOOST_GET_CONST(platform::CPUPlace, place_), max_memory_size)); -#ifdef PADDLE_WITH_CUDA - } + } else if (platform::is_xpu_place(place_)) { +#ifdef PADDLE_WITH_XPU + gc.reset(new XPUGarbageCollector( + BOOST_GET_CONST(platform::XPUPlace, place_), max_memory_size)); +#else + PADDLE_THROW( + platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle")); #endif + } } for (int64_t i = start_op_index; i < end_op_index; ++i) { diff --git a/paddle/fluid/framework/fleet/CMakeLists.txt b/paddle/fluid/framework/fleet/CMakeLists.txt index 0d62488bfe67a316f4840107508129c49b36f23c..3eee0a1abbaf04aef2faa9e52c552e89ce84c7de 100644 --- a/paddle/fluid/framework/fleet/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/CMakeLists.txt @@ -19,6 +19,6 @@ else() cc_library(gloo_wrapper SRCS gloo_wrapper.cc DEPS framework_proto variable_helper scope) endif(WITH_GLOO) -cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto device_context) +cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto device_context heter_service_proto) cc_test(test_fleet SRCS test_fleet.cc DEPS fleet_wrapper gloo_wrapper fs shell) diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index ac892443de36cf6d37d56da761fb3d60628a5e4a..f69ada080676cddfa4f31c6cbc450b8eca28b3ac 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -50,6 +50,15 @@ void CPUGarbageCollector::ClearCallback(const std::function &callback) { callback(); } +#ifdef PADDLE_WITH_XPU +XPUGarbageCollector::XPUGarbageCollector(const platform::XPUPlace &place, + size_t max_memory_size) + : GarbageCollector(place, max_memory_size) {} +void XPUGarbageCollector::ClearCallback(const std::function &callback) { + callback(); +} +#endif + #ifdef PADDLE_WITH_CUDA UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector( const platform::CUDAPlace &place, size_t max_memory_size) diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h index 2212122c03de3416c91fcc46bf510bbc02d4302e..4f7739652822b9047b1798b6bd66261effbe2f49 100644 --- a/paddle/fluid/framework/garbage_collector.h +++ b/paddle/fluid/framework/garbage_collector.h @@ -59,6 +59,16 @@ class CPUGarbageCollector : public GarbageCollector { void ClearCallback(const std::function &callback) override; }; +#ifdef PADDLE_WITH_XPU +class XPUGarbageCollector : public GarbageCollector { + public: + XPUGarbageCollector(const platform::XPUPlace &place, size_t max_memory_size); + + protected: + void ClearCallback(const std::function &callback) override; +}; +#endif + #ifdef PADDLE_WITH_CUDA class UnsafeFastGPUGarbageCollector : public GarbageCollector { public: diff --git a/paddle/fluid/framework/generator.cc b/paddle/fluid/framework/generator.cc new file mode 100644 index 0000000000000000000000000000000000000000..d00e38784c2c0415a59a33fc24d708c253481c21 --- /dev/null +++ b/paddle/fluid/framework/generator.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/generator.h" + +namespace paddle { +namespace framework { + +std::shared_ptr Generator::gen_instance_ = NULL; + +GeneratorState* Generator::GetState() { + std::lock_guard lock(this->mutex); + return this->state_.get(); +} + +void Generator::SetState(GeneratorState* state_in) { + std::lock_guard lock(this->mutex); + *this->state_ = *state_in; +} + +uint64_t Generator::GetCurrentSeed() { + std::lock_guard lock(this->mutex); + return this->state_->current_seed; +} + +uint64_t Generator::Seed() { + std::lock_guard lock(this->mutex); + uint64_t seed; + std::random_device de; + seed = ((((uint64_t)de()) << 32) + de()) & 0x1FFFFFFFFFFFFF; + this->state_->current_seed = seed; + std::seed_seq seq({seed}); + this->state_->cpu_engine.seed(seq); + + return this->state_->current_seed; +} + +void Generator::SetCurrentSeed(uint64_t seed) { + std::lock_guard lock(this->mutex); + this->state_->current_seed = uint64_t(seed); + std::seed_seq seq({seed}); + this->state_->cpu_engine.seed(seq); +} + +std::mt19937_64& Generator::GetCPUEngine() { + std::lock_guard lock(this->mutex); + return this->state_->cpu_engine; +} + +void Generator::SetCPUEngine(std::mt19937_64 engine) { + std::lock_guard lock(this->mutex); + this->state_->cpu_engine = std::mt19937_64(engine); +} + +uint64_t Generator::Random64() { + std::lock_guard lock(this->mutex); + return this->state_->cpu_engine(); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/generator.h b/paddle/fluid/framework/generator.h new file mode 100644 index 0000000000000000000000000000000000000000..17870782ba72a3247de734642962ffec48c0c91e --- /dev/null +++ b/paddle/fluid/framework/generator.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include // temp for debug +#include +#include // NOLINT +#include +#include +#include + +namespace paddle { +namespace framework { + +struct GeneratorState { + int64_t device = -1; + uint64_t current_seed = 34342423252; + std::mt19937_64 cpu_engine; +}; + +struct Generator { + Generator() { + GeneratorState default_gen_state_cpu; + default_gen_state_cpu.device = -1; + default_gen_state_cpu.current_seed = 34342423252; + std::seed_seq seq({34342423252}); + default_gen_state_cpu.cpu_engine = std::mt19937_64(seq); + this->state_ = std::make_shared(default_gen_state_cpu); + } + explicit Generator(GeneratorState state_in) + : state_{std::make_shared(state_in)} {} + Generator(const Generator& other) + : Generator(other, std::lock_guard(other.mutex)) {} + + // get random state + GeneratorState* GetState(); + // set random state + void SetState(GeneratorState* state_in); + // get current seed + uint64_t GetCurrentSeed(); + // random a seed and get + uint64_t Seed(); + + // set seed + void SetCurrentSeed(uint64_t seed); + // get cpu engine + std::mt19937_64& GetCPUEngine(); + // set cpu engine + void SetCPUEngine(std::mt19937_64 engine); + + uint64_t Random64(); + + bool is_init_py = false; + + // CPU Generator singleton + static std::shared_ptr GetInstance() { + if (NULL == gen_instance_) { + gen_instance_.reset(new paddle::framework::Generator()); + } + return gen_instance_; + } + + static std::shared_ptr GetInstanceX() { + if (NULL == gen_instance_) { + gen_instance_.reset(new paddle::framework::Generator()); + } + gen_instance_->is_init_py = true; + return gen_instance_; + } + + private: + static std::shared_ptr gen_instance_; + std::shared_ptr state_; + mutable std::mutex mutex; + + Generator(const Generator& other, const std::lock_guard&) + : state_(std::make_shared(*(other.state_))) {} +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 60e4ac8cbcfd8cc8f1d14363538fe1e118b953cd..9d3e0806ac79d838765ca5a4bbf61d0f67ab6ed5 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -368,3 +368,7 @@ REGISTER_PASS(conv_transpose_bn_fuse_pass, paddle::framework::ir::ConvTransposeBNFusePass); REGISTER_PASS(conv_transpose_eltwiseadd_bn_fuse_pass, paddle::framework::ir::ConvTransposeEltwiseAddBNFusePass); +REGISTER_PASS(depthwise_conv_bn_fuse_pass, + paddle::framework::ir::DepthwiseConvBNFusePass); +REGISTER_PASS(depthwise_conv_eltwiseadd_bn_fuse_pass, + paddle::framework::ir::DepthwiseConvEltwiseAddBNFusePass); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h index fcdbcf299c504c00b3027207bc2f4ac019d48ffc..57a9f69ca15af2759874a1e2a0b58399de652693 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h @@ -56,6 +56,16 @@ class ConvTransposeEltwiseAddBNFusePass : public ConvEltwiseAddBNFusePass { std::string conv_type() const { return "conv2d_transpose"; } }; +class DepthwiseConvBNFusePass : public ConvBNFusePass { + public: + std::string conv_type() const { return "depthwise_conv2d"; } +}; + +class DepthwiseConvEltwiseAddBNFusePass : public ConvEltwiseAddBNFusePass { + public: + std::string conv_type() const { return "depthwise_conv2d"; } +}; + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/subgraph_detector.cc b/paddle/fluid/framework/ir/subgraph_detector.cc index 62c91af15da60b9b0a74028afb0aeb689073b524..7979953d7be827ffc944ae939782923504802bbc 100644 --- a/paddle/fluid/framework/ir/subgraph_detector.cc +++ b/paddle/fluid/framework/ir/subgraph_detector.cc @@ -309,7 +309,8 @@ std::vector> SubgraphDetector::ExtractSubGraphs() { BriefNode *brief_node = itr.second; if (!Agent(brief_node->node).marked()) { - VLOG(4) << brief_node->node->id() << " node not a trt candidate."; + VLOG(4) << brief_node->node->id() << " node named " + << brief_node->node->Name() << " is not a trt candidate."; continue; } diff --git a/paddle/fluid/framework/library_type.h b/paddle/fluid/framework/library_type.h index d46f8a574c0d956dc0a90bc2741d2cb80313ab7f..4307e51862df572e013431fceaaf89cc1cf6679c 100644 --- a/paddle/fluid/framework/library_type.h +++ b/paddle/fluid/framework/library_type.h @@ -59,6 +59,8 @@ inline LibraryType StringToLibraryType(const char* ctype) { // CPU, CUDA, PLAIN are same library type. } else if (s == std::string("CPU")) { return LibraryType::kPlain; + } else if (s == std::string("XPU")) { + return LibraryType::kPlain; } else if (s == std::string("CUDA")) { return LibraryType::kPlain; } else { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 78595e50b2da627065309041079839faa197cc8f..bccc92e5c4352927f309f3605bb3c8d8dd823bb5 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -78,21 +78,37 @@ class CompileTimeInferShapeContext : public InferShapeContext { void ShareDim(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) override { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); + PADDLE_ENFORCE_LT(i, Inputs(in).size(), + platform::errors::InvalidArgument( + "The input variable index is out of range, expected " + "index less than %d, but received index is %d.", + Inputs(in).size(), i)); + PADDLE_ENFORCE_LT(j, Outputs(out).size(), + platform::errors::InvalidArgument( + "The output variable index is out of range, expected " + "index less than %d, but received index is %d.", + Outputs(out).size(), j)); + std::string input_n = Inputs(in)[i]; std::string output_n = Outputs(out)[j]; - PADDLE_ENFORCE(input_n != framework::kEmptyVarName, "The %s[%d] is @EMPTY@", - in, i); - PADDLE_ENFORCE(output_n != framework::kEmptyVarName, - "The %s[%d] is @EMPTY@", out, j); + PADDLE_ENFORCE_NE(input_n, framework::kEmptyVarName, + platform::errors::InvalidArgument( + "The input variable %s[%d] is empty.", in, i)); + PADDLE_ENFORCE_NE(output_n, framework::kEmptyVarName, + platform::errors::InvalidArgument( + "The output variable %s[%d] is empty.", out, j)); auto *in_var = block_.FindVarRecursive(input_n); auto *out_var = block_.FindVarRecursive(output_n); - PADDLE_ENFORCE(in_var->GetType() == out_var->GetType(), - "The type of %s and %s is not the same.", input_n, output_n); + PADDLE_ENFORCE_EQ( + in_var->GetType(), out_var->GetType(), + platform::errors::InvalidArgument( + "The type of input %s and output %s do not match. The input type " + "is %s, output type is %s.", + input_n, output_n, DataTypeToString(in_var->GetType()), + DataTypeToString(out_var->GetType()))); SetDim(output_n, GetDim(input_n)); } @@ -126,12 +142,22 @@ class CompileTimeInferShapeContext : public InferShapeContext { void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) const override { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); - PADDLE_ENFORCE(Inputs(in)[i] != framework::kEmptyVarName, - "The %s[%d] is @EMPTY@", in, i); - PADDLE_ENFORCE(Outputs(out)[j] != framework::kEmptyVarName, - "The %s[%d] is @EMPTY@", out, j); + PADDLE_ENFORCE_LT(i, Inputs(in).size(), + platform::errors::InvalidArgument( + "The input variable index is out of range, expected " + "index less than %d, but received index is %d.", + Inputs(in).size(), i)); + PADDLE_ENFORCE_LT(j, Outputs(out).size(), + platform::errors::InvalidArgument( + "The output variable index is out of range, expected " + "index less than %d, but received index is %d.", + Outputs(out).size(), j)); + PADDLE_ENFORCE_NE(Inputs(in)[i], framework::kEmptyVarName, + platform::errors::InvalidArgument( + "The input variable %s[%d] is empty.", in, i)); + PADDLE_ENFORCE_NE(Outputs(out)[j], framework::kEmptyVarName, + platform::errors::InvalidArgument( + "The output variable %s[%d] is empty.", out, j)); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); if (in_var->GetType() != proto::VarType::LOD_TENSOR && @@ -144,30 +170,38 @@ class CompileTimeInferShapeContext : public InferShapeContext { int32_t GetLoDLevel(const std::string &in, size_t i = 0) const override { PADDLE_ENFORCE_LT(i, Inputs(in).size(), - "Input %s of operator %s only has %d elements.", in, - op_.Type(), Inputs(in).size()); + platform::errors::InvalidArgument( + "The input variable index is out of range, input " + "variable %s of operator %s only has %d elements.", + in, op_.Type(), Inputs(in).size())); PADDLE_ENFORCE_NE(Inputs(in)[i], framework::kEmptyVarName, - "Input %s[%d] of operator %s is @EMPTY@", in, op_.Type(), - i); + platform::errors::InvalidArgument( + "The input variable %s[%d] of operator %s is empty.", + in, i, op_.Type())); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); PADDLE_ENFORCE_NOT_NULL( - in_var, "Input %s[%d] of operator %s should not be nullptr.", in, - op_.Type(), i); + in_var, platform::errors::NotFound( + "The input variable %s[%d] of operator %s is not found.", + in, i, op_.Type())); return in_var->GetLoDLevel(); } void SetLoDLevel(const std::string &out, int32_t lod_level, size_t j = 0) const override { PADDLE_ENFORCE_LT(j, Outputs(out).size(), - "Output %s of operator %s only has %d elements.", out, - op_.Type(), Outputs(out).size()); + platform::errors::InvalidArgument( + "The output variable index is out of range, output " + "variable %s of operator %s only has %d elements.", + out, op_.Type(), Outputs(out).size())); PADDLE_ENFORCE_NE(Outputs(out)[j], framework::kEmptyVarName, - "Output %s[%d] of operator %s is @EMPTY@", out, - op_.Type(), j); + platform::errors::InvalidArgument( + "The output variable %s[%d] of operator %s is empty.", + out, j, op_.Type())); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); PADDLE_ENFORCE_NOT_NULL( - out_var, "Output %s[%d] of operator %s should not be nullptr.", out, - op_.Type(), j); + out_var, platform::errors::NotFound( + "The output variable %s[%d] of operator %s is not found.", + out, j, op_.Type())); if (lod_level >= 0) { out_var->SetLoDLevel(lod_level); } @@ -200,8 +234,10 @@ class CompileTimeInferShapeContext : public InferShapeContext { DDim GetInputDim(const std::string &name) const override { const std::vector &arg_names = Inputs(name); PADDLE_ENFORCE_EQ(arg_names.size(), 1UL, - "Input(%s) should hold one element, but now it holds %d", - name, arg_names.size()); + platform::errors::InvalidArgument( + "The input(%s) should hold only one element, but now " + "it holds %d elements.", + name, arg_names.size())); return this->GetDim(arg_names[0]); } @@ -225,8 +261,10 @@ class CompileTimeInferShapeContext : public InferShapeContext { void SetOutputDim(const std::string &name, const DDim &dim) override { auto arg_names = Outputs(name); PADDLE_ENFORCE_EQ(arg_names.size(), 1UL, - "Output(%s) should hold one element, but now it holds %d", - name, arg_names.size()); + platform::errors::InvalidArgument( + "The iutput(%s) should hold only one element, but " + "now it holds %d elements.", + name, arg_names.size())); SetDim(arg_names[0], dim); } @@ -252,7 +290,8 @@ class CompileTimeInferShapeContext : public InferShapeContext { DDim GetDim(const std::string &name) const { auto var = block_.FindVarRecursive(name); - PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found.", name)); DDim res; try { auto shape = var->GetShape(); @@ -278,7 +317,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { void SetDims(const std::vector &names, const std::vector &dims) { size_t length = names.size(); - PADDLE_ENFORCE_EQ(length, dims.size()); + PADDLE_ENFORCE_EQ(length, dims.size(), + platform::errors::InvalidArgument( + "The input variables number(%d) and input dimensions " + "number(%d) do not match.", + length, dims.size())); for (size_t i = 0; i < length; ++i) { if (names[i] == framework::kEmptyVarName) { continue; @@ -364,8 +407,10 @@ proto::OpDesc *OpDesc::Proto() { const std::vector &OpDesc::Input(const std::string &name) const { auto it = inputs_.find(name); - PADDLE_ENFORCE(it != inputs_.end(), "Input %s cannot be found in Op %s", name, - Type()); + PADDLE_ENFORCE_NE( + it, inputs_.end(), + platform::errors::NotFound("Input %s cannot be found in operator %s.", + name, Type())); return it->second; } @@ -385,8 +430,10 @@ void OpDesc::SetInput(const std::string ¶m_name, const std::vector &OpDesc::Output(const std::string &name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), "Output %s cannot be found in Op %s", - name, Type()); + PADDLE_ENFORCE_NE( + it, outputs_.end(), + platform::errors::NotFound("Output %s cannot be found in operator %s.", + name, Type())); return it->second; } @@ -427,7 +474,8 @@ bool OpDesc::HasProtoAttr(const std::string &name) const { proto::AttrType OpDesc::GetAttrType(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); + PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound( + "Attribute %s is not found.", name)); return static_cast(it->second.which() - 1); } @@ -492,7 +540,8 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { return; } default: - PADDLE_THROW("Wrong attr type %d", attr.type()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported attribute type (code %d).", attr.type())); } need_update_ = true; return; @@ -529,7 +578,8 @@ void OpDesc::SetAttrMap( Attribute OpDesc::GetAttr(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); + PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound( + "Attribute %s is not found.", name)); return it->second; } @@ -543,7 +593,8 @@ const proto::OpProto::Attr &OpDesc::GetProtoAttr( } } - PADDLE_THROW("Attribute %s is not found in proto %s", name, proto.type()); + PADDLE_THROW(platform::errors::NotFound( + "Attribute %s is not found in proto %s.", name, proto.type())); } Attribute OpDesc::GetNullableAttr(const std::string &name) const { @@ -557,7 +608,10 @@ Attribute OpDesc::GetNullableAttr(const std::string &name) const { std::vector OpDesc::GetBlocksAttrIds(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); + PADDLE_ENFORCE_NE( + it, attrs_.end(), + platform::errors::NotFound( + "Attribute `%s` is not found in operator `%s`.", name, desc_.type())); auto blocks = BOOST_GET_CONST(std::vector, it->second); std::vector ids; @@ -570,7 +624,10 @@ std::vector OpDesc::GetBlocksAttrIds(const std::string &name) const { int OpDesc::GetBlockAttrId(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); + PADDLE_ENFORCE_NE( + it, attrs_.end(), + platform::errors::NotFound( + "Attribute `%s` is not found in operator `%s`.", name, desc_.type())); return BOOST_GET_CONST(BlockDesc *, it->second)->ID(); } @@ -657,7 +714,11 @@ struct SetAttrDescVisitor : public boost::static_visitor { VectorToRepeated(v, attr_->mutable_longs()); } - void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } + void operator()(boost::blank) const { + PADDLE_THROW(platform::errors::Unavailable( + "Unsupported calling method of SetAttrDescVisitor object for " + "`boosst::blank` type.")); + } }; void OpDesc::Flush() { @@ -691,8 +752,9 @@ void OpDesc::Flush() { } void OpDesc::CheckAttrs() { - PADDLE_ENFORCE(!Type().empty(), - "CheckAttr() can not be called before type is set."); + PADDLE_ENFORCE_EQ(Type().empty(), false, + platform::errors::PreconditionNotMet( + "CheckAttrs() can not be called before type is set.")); auto *checker = OpInfoMap::Instance().Get(Type()).Checker(); if (checker == nullptr) { // checker is not configured. That operator could be generated by Paddle, @@ -707,8 +769,10 @@ void OpDesc::InferShape(const BlockDesc &block) const { try { VLOG(3) << "CompileTime infer shape on " << Type(); auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; - PADDLE_ENFORCE(static_cast(infer_shape), - "%s's infer_shape has not been registered", this->Type()); + PADDLE_ENFORCE_EQ( + static_cast(infer_shape), true, + platform::errors::NotFound( + "Operator %s's infer_shape is not registered.", this->Type())); CompileTimeInferShapeContext ctx(*this, block); if (VLOG_IS_ON(10)) { std::ostringstream sout; @@ -758,10 +822,10 @@ bool CompileTimeInferShapeContext::HasInput(const std::string &name) const { if (length == 0) { return false; } - PADDLE_ENFORCE_EQ(length, 1UL, - "Input(%s) should have only one value, " - "but it have %d now", - name, length); + PADDLE_ENFORCE_EQ(length, 1UL, platform::errors::InvalidArgument( + "Input(%s) should have only one value, " + "but it has %d values now.", + name, length)); return block_.HasVarRecursive(input_names[0]); } @@ -774,10 +838,10 @@ bool CompileTimeInferShapeContext::HasOutput(const std::string &name) const { if (length == 0) { return false; } - PADDLE_ENFORCE_EQ(length, 1UL, - "Output(%s) should have only one value, " - "but it have %d now", - name, length); + PADDLE_ENFORCE_EQ(length, 1UL, platform::errors::InvalidArgument( + "Output(%s) should have only one value, " + "but it has %d values now.", + name, length)); return block_.HasVarRecursive(output_names[0]); } @@ -826,7 +890,8 @@ std::vector CompileTimeInferShapeContext::Outputs( std::vector CompileTimeInferShapeContext::GetRepeatedDims( const std::string &name) const { auto var = block_.FindVarRecursive(name); - PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found.", name)); std::vector res; try { auto shapes = var->GetShapes(); @@ -848,7 +913,8 @@ void CompileTimeInferShapeContext::SetDim(const std::string &name, void CompileTimeInferShapeContext::SetRepeatedDims( const std::string &name, const std::vector &dims) { auto var = block_.FindVarRecursive(name); - PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found.", name)); std::vector> dim_vec(dims.size()); std::transform(dims.begin(), dims.end(), dim_vec.begin(), vectorize<>); var->SetShapes(dim_vec); diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 0f842637a58e0897e8b68fe06d1e712ffd20ad97..d8159d6a5c294b85d8d5ab9bbee3b95a5eba793f 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -268,6 +268,9 @@ struct OpKernelRegistrarFunctorEx +#include +#include +#include +#include + +#include +#include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace compatible { + +struct OpUpdateRecord { + enum class Type { kInvalid = 0, kModifyAttr, kNewAttr }; + Type type_; + std::string remark_; +}; + +struct ModifyAttr : OpUpdateRecord { + ModifyAttr(const std::string& name, const std::string& remark, + boost::any default_value) + : OpUpdateRecord({Type::kModifyAttr, remark}), + name_(name), + default_value_(default_value) { + // TODO(Shixiaowei02): Check the data type with proto::OpDesc. + } + + private: + std::string name_; + boost::any default_value_; +}; +struct NewAttr : OpUpdateRecord { + NewAttr(const std::string& name, const std::string& remark, + boost::any default_value) + : OpUpdateRecord({Type::kNewAttr, remark}), + name_(name), + default_value_(default_value) {} + + private: + std::string name_; + boost::any default_value_; +}; + +class OpVersionDesc { + public: + OpVersionDesc& ModifyAttr(const std::string& name, const std::string& remark, + boost::any default_value) { + infos_.push_back(std::shared_ptr( + new compatible::ModifyAttr(name, remark, default_value))); + return *this; + } + + OpVersionDesc& NewAttr(const std::string& name, const std::string& remark, + boost::any default_value) { + infos_.push_back(std::shared_ptr( + new compatible::NewAttr(name, remark, default_value))); + return *this; + } + + private: + std::vector> infos_; +}; + +class OpVersion { + public: + OpVersion& AddCheckpoint(const std::string& note, + const OpVersionDesc& op_version_desc) { + checkpoints_.push_back(Checkpoint({note, op_version_desc})); + return *this; + } + + private: + struct Checkpoint { + std::string note_; + OpVersionDesc op_version_desc_; + }; + std::vector checkpoints_; +}; + +class OpVersionRegistrar { + public: + static OpVersionRegistrar& GetInstance() { + static OpVersionRegistrar instance; + return instance; + } + OpVersion& Register(const std::string& op_type) { + if (op_version_map_.find(op_type) != op_version_map_.end()) { + PADDLE_THROW("'%s' is registered in operator version more than once.", + op_type); + } + op_version_map_.insert({op_type, OpVersion()}); + return op_version_map_[op_type]; + } + + private: + std::unordered_map op_version_map_; + + OpVersionRegistrar() = default; + OpVersionRegistrar& operator=(const OpVersionRegistrar&) = delete; +}; + +} // namespace compatible +} // namespace framework +} // namespace paddle + +#define REGISTER_OP_VERSION(op_type) \ + static paddle::framework::compatible::OpVersion \ + RegisterOpVersion__##op_type = \ + paddle::framework::compatible::OpVersionRegistrar::GetInstance() \ + .Register(#op_type) diff --git a/paddle/fluid/framework/op_version_registry_test.cc b/paddle/fluid/framework/op_version_registry_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..77891dafc81b3a96af28cb480f1620543caab0b8 --- /dev/null +++ b/paddle/fluid/framework/op_version_registry_test.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include + +#include "paddle/fluid/framework/op_version_registry.h" + +namespace paddle { +namespace framework { +namespace compatible { + +TEST(test_operator_version, test_operator_version) { + REGISTER_OP_VERSION(test__) + .AddCheckpoint( + R"ROC( + Upgrade reshape, modified one attribute [axis] and add a new attribute [size]. + )ROC", + framework::compatible::OpVersionDesc() + .ModifyAttr("axis", + "Increased from the original one method to two.", -1) + .NewAttr("size", + "In order to represent a two-dimensional rectangle, the " + "parameter size is added.", + 0)) + .AddCheckpoint( + R"ROC( + Add a new attribute [height] + )ROC", + framework::compatible::OpVersionDesc().NewAttr( + "height", + "In order to represent a two-dimensional rectangle, the " + "parameter height is added.", + 0)); +} +} // namespace compatible +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index c8c18bcee6a8868919c584527c088725c1c9d58d..ca2705f154c4f45dfccd954b23209c71701adce5 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -34,6 +34,9 @@ limitations under the License. */ #include "paddle/fluid/framework/unused_var_check.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_info.h" +#endif #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -165,6 +168,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { #else auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; platform::SetDeviceId(dev_id); +#endif + } else if (platform::is_xpu_place(place)) { +#ifndef PADDLE_WITH_XPU + PADDLE_THROW(platform::errors::Unimplemented( + "Cannot run operator on place %s", place)); +#else + auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device; + platform::SetXPUDeviceId(dev_id); #endif } @@ -1109,6 +1120,16 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; kernel_iter = kernels.find(expected_kernel_key); } +#endif +#ifdef PADDLE_WITH_XPU + if (kernel_iter == kernels.end() && + is_xpu_place(expected_kernel_key.place_)) { + VLOG(3) << "missing XPU kernel: " << type_ + << ", expected_kernel_key:" << expected_kernel_key + << ", fallbacking to CPU one!"; + expected_kernel_key.place_ = platform::CPUPlace(); + kernel_iter = kernels.find(expected_kernel_key); + } #endif if (kernel_iter == kernels.end()) { PADDLE_THROW("op %s does not have kernel for %s", type_, diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8c6dd628bb9748bb120c1c39841e199659fb53fc..12e0f97f1262ca0f6bf8fc70ab5b482fb0bdd305 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -449,6 +449,9 @@ ParallelExecutor::ParallelExecutor(const std::vector &places, const BuildStrategy &build_strategy, ir::Graph *graph) : member_(new ParallelExecutorPrivate(places, scope)) { + PADDLE_ENFORCE(places.size() > 0 && !is_xpu_place(places[0]), + platform::errors::Unavailable( + "XPU is not supported in ParallelExecutor")); ir::InitReaderQueueDeviceCount(graph, *(member_->global_scope_), member_->places_.size()); member_->use_cuda_ = exec_strategy.use_cuda_; diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc index 919378c929185b12826c8b427d0e9a86a382bb2b..274b0ca0d903d4e89c7bceb74bc16581f03bb584 100644 --- a/paddle/fluid/framework/prune.cc +++ b/paddle/fluid/framework/prune.cc @@ -210,6 +210,23 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output, should_run.push_back(true); } else { should_run.push_back(false); + // If the output of an op modifies feed vars, the op should not clip. + // For example, in the transformer structure, the third parameter returned + // by beam_search op is generally assigned to a feed var. Cutting the + // assign op will cause an error. + if (parent_block_id != -1) { + bool flag = false; + for (auto& var : op_desc.outputs()) { + for (auto& argu : var.arguments()) { + if (feed_var_names.count(argu)) { + flag = true; + } + } + } + if (flag) { + should_run.back() = true; + } + } } } diff --git a/paddle/fluid/framework/prune_test.cc b/paddle/fluid/framework/prune_test.cc index eb5c241a8372a460483c70e38f962168b1cdbbc0..12fa0c61f8121d475a0cf2aa78e4bb995a01b132 100644 --- a/paddle/fluid/framework/prune_test.cc +++ b/paddle/fluid/framework/prune_test.cc @@ -185,3 +185,34 @@ TEST(Prune, recurrrent_op) { EXPECT_EQ(pruned.blocks(0).ops_size(), 2); EXPECT_EQ(pruned.blocks(1).ops_size(), 1); } + +// If the output of an op modifies feed vars, the op should not clip. +TEST(Prune, recurrrent_op_2) { + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::BlockDesc *sub_block = program.AppendBlock(*block); + AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}}, + f::AttributeMap{}, block); + + std::vector state_var_name(1, "y"); + AddOp("recurrent", {{"input", {"b", "c"}}}, {{"output", {"b1, c1"}}}, + {{"ex_states", state_var_name}, + {"states", state_var_name}, + {"sub_block", sub_block}}, + block); + + EXPECT_TRUE(sub_block != nullptr); + AddOp("rnn_memory_helper", {{"input", {"x"}}}, {{"output", {"a"}}}, + f::AttributeMap{}, sub_block); + + f::proto::ProgramDesc *pdesc = program.Proto(); + pdesc->mutable_blocks(0)->mutable_ops(1)->set_is_target(true); + + f::proto::ProgramDesc pruned; + std::set feed_var_names = {"x", "a"}; + + f::Prune(*pdesc, feed_var_names, &pruned); + EXPECT_EQ(pruned.blocks_size(), 2); + EXPECT_EQ(pruned.blocks(0).ops_size(), 2); + EXPECT_EQ(pruned.blocks(1).ops_size(), 1); +} diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 50637a0c3d3f9c6975578e94e6ddc2c898c926e0..3b3271fc5b936e65b60930f43ea5c4f6f8448941 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -54,14 +54,43 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); } +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size); + } else if (platform::is_cpu_place(src_place) && + platform::is_xpu_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); + } else if (platform::is_xpu_place(src_place) && + platform::is_xpu_place(dst_place)) { + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Copy from %s to %s is not supported.", src_place, dst_place)); + } +#endif #ifdef PADDLE_WITH_CUDA else if (platform::is_cuda_pinned_place(src_place) && // NOLINT platform::is_cpu_place(dst_place)) { memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place), src_ptr, size); - } else if (platform::is_gpu_place(src_place) && // NOLINT - platform::is_cpu_place(dst_place)) { + } + else if (platform::is_cpu_place(src_place) && // NOLINT + platform::is_cuda_pinned_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place); auto ctx_place = ctx.GetPlace(); @@ -71,8 +100,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto stream = reinterpret_cast(ctx).stream(); memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); - } else if (platform::is_cpu_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_cpu_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto ctx_place = ctx.GetPlace(); @@ -82,8 +112,32 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto stream = reinterpret_cast(ctx).stream(); memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); - } else if (platform::is_cuda_pinned_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cuda_pinned_place(dst_place)) { + auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); + auto dst_cuda_pinned_place = + BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), true, + platform::errors::PreconditionNotMet( + "Device context place mismatch. When copying Tensor " + "data from GPU memory to CUDA Pinned memory, current " + "device context place should be GPU.")); + auto ctx_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, + platform::errors::PreconditionNotMet( + "The source GPU device and current device context do " + "not match. The source GPU device number is %d, but " + "device context GPU number is %d.", + src_gpu_place.device, ctx_gpu_place.device)); + auto stream = + reinterpret_cast(ctx).stream(); + memory::Copy(dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + } + else if (platform::is_cuda_pinned_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_cuda_pinned_place = BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); @@ -104,8 +158,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, reinterpret_cast(ctx).stream(); memory::Copy(dst_gpu_place, dst_ptr, src_cuda_pinned_place, src_ptr, size, stream); - } else if (platform::is_gpu_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); auto ctx_place = ctx.GetPlace(); @@ -128,7 +183,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); } } - } else { + } + else { // NOLINT PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); } #endif @@ -174,35 +230,74 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); } +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size); + } else if (platform::is_cpu_place(src_place) && // NOLINT + platform::is_xpu_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); + } else if (platform::is_xpu_place(src_place) && // NOLINT + platform::is_xpu_place(dst_place)) { + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size); + } else { // NOLINT + PADDLE_THROW(platform::errors::Unimplemented( + "Copy from %s to %s is not supported.", src_place, dst_place)); + } +#endif #ifdef PADDLE_WITH_CUDA else if (platform::is_cuda_pinned_place(src_place) && // NOLINT platform::is_cpu_place(dst_place)) { memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place), src_ptr, size); - } else if (platform::is_gpu_place(src_place) && // NOLINT - platform::is_cpu_place(dst_place)) { + } + else if (platform::is_cpu_place(src_place) && // NOLINT + platform::is_cuda_pinned_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size); + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cuda_pinned_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CUDAPinnedPlace, dst_place), dst_ptr, + BOOST_GET_CONST(platform::CUDAPlace, src_place), src_ptr, size, + nullptr); + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_cpu_place = BOOST_GET_CONST(platform::CPUPlace, dst_place); memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr); - } else if (platform::is_cpu_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_cpu_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_cpu_place = BOOST_GET_CONST(platform::CPUPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr); - } else if (platform::is_gpu_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr); - } else if (platform::is_cuda_pinned_place(src_place) && - platform::is_gpu_place(dst_place)) { + } + else if (platform::is_cuda_pinned_place(src_place) && // NOLINT + platform::is_gpu_place(dst_place)) { auto src_pinned_place = BOOST_GET_CONST(platform::CUDAPinnedPlace, src_place); auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dst_place); memory::Copy(dst_gpu_place, dst_ptr, src_pinned_place, src_ptr, size, nullptr); - } else { + } + else { // NOLINT PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place); } #endif @@ -241,6 +336,19 @@ class AnyVisitor : public boost::static_visitor { const framework::Tensor& tensor_; Predicate predicate_; + bool GetResultHelper(const framework::Tensor& out, + const platform::Place& place) const { + platform::CPUPlace cpu; + framework::Tensor tmp; + tmp.Resize({1}); + tmp.mutable_data(cpu); + auto ctx = platform::DeviceContextPool::Instance().Get(place); + ctx->Wait(); + TensorCopy(out, cpu, *ctx, &tmp); + ctx->Wait(); + return GetResult(tmp, cpu); + } + public: AnyVisitor(const framework::Tensor& tensor, Predicate predicate) : tensor_(tensor), predicate_(std::move(predicate)) {} @@ -255,17 +363,14 @@ class AnyVisitor : public boost::static_visitor { return this->GetResult(out, place); } + bool GetResult(const framework::Tensor& out, + const platform::XPUPlace& xpu) const { + return GetResultHelper(out, xpu); + } + bool GetResult(const framework::Tensor& out, const platform::CUDAPlace& gpu) const { - platform::CPUPlace cpu; - framework::Tensor tmp; - tmp.Resize({1}); - tmp.mutable_data(cpu); - auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); - gpuctx->Wait(); - TensorCopy(out, cpu, *gpuctx, &tmp); - gpuctx->Wait(); - return GetResult(tmp, cpu); + return GetResultHelper(out, gpu); } bool GetResult(const framework::Tensor& out, @@ -315,6 +420,61 @@ inline void Any(const framework::Tensor& tensor, Predicate predicate, platform::VisitPlace(place, visitor); } +template +struct AllDTypeVisitor { + Predicate predicate_; + const Tensor& tensor_; + const DevCtx& ctx_; + Tensor* out_; + + AllDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, + Tensor* out) + : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} + + template + void apply() const { + auto t = EigenVector::Flatten(tensor_); + auto o = EigenVector::Flatten(*out_); + o.device(*ctx_.eigen_device()) = predicate_(t); + } +}; + +template +inline void AllImpl(Predicate predicate, const framework::Tensor& tensor, + const DevCtx& ctx, framework::Tensor* out) { + VisitDataType(tensor.type(), AllDTypeVisitor( + predicate, tensor, ctx, out)); +} + +template +class AllOutVisitor : public boost::static_visitor<> { + private: + const framework::Tensor& tensor_; + mutable framework::Tensor* out_; + Predicate predicate_; + + public: + AllOutVisitor(const framework::Tensor& tensor, Predicate predicate, + framework::Tensor* out) + : tensor_(tensor), out_(out), predicate_(predicate) {} + + template + void operator()(const Place& place) const { + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); + out_->Resize(tensor_.dims()); + out_->mutable_data(place); + AllImpl(predicate_, tensor_, *ctx, out_); + } +}; + +template +inline void All(const framework::Tensor& tensor, Predicate predicate, + framework::Tensor* out) { + AllOutVisitor visitor(tensor, predicate, out); + auto place = tensor.place(); + platform::VisitPlace(place, visitor); +} + struct ContainsNANPredicate { template auto operator()(const T& eigen_vec) const @@ -335,6 +495,12 @@ void TensorContainsNAN(const framework::Tensor& tensor, Any(tensor, predicate, out); } +void TensorContainsNANV2(const framework::Tensor& tensor, + framework::Tensor* out) { + ContainsNANPredicate predicate; + All(tensor, predicate, out); +} + struct ContainsInfPredicate { template auto operator()(const T& eigen_vec) const @@ -355,6 +521,12 @@ void TensorContainsInf(const framework::Tensor& tensor, Any(tensor, predicate, out); } +void TensorContainsInfV2(const framework::Tensor& tensor, + framework::Tensor* out) { + ContainsInfPredicate predicate; + All(tensor, predicate, out); +} + // NOTE(dzhwinter): // Isfinite need a AllVisitor to loop through all the elements. // We choose two cuda call instead of one allvisitor. The AllVisitor @@ -367,8 +539,8 @@ bool TensorIsfinite(const framework::Tensor& tensor) { #ifdef PADDLE_WITH_CUDA template -static inline void __global__ BothFalse(const T* cmp, T* out) { - out[0] = (!cmp[0]) && (!out[0]); +static inline void __global__ BothFalse(const T* cmp, T* out, int element_num) { + CUDA_KERNEL_LOOP(i, element_num) { out[i] = (!cmp[i]) && (!out[i]); } } #endif @@ -383,25 +555,47 @@ struct BothFalseVisitor : public boost::static_visitor<> { VisitorImpl(place); } + void VisitorImpl(const platform::XPUPlace& xpu) const { + PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); + } + void VisitorImpl(const platform::CUDAPlace& gpu) const { #ifdef PADDLE_WITH_CUDA auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu); - BothFalse<<<1, 1, 0, ctx->stream()>>>(in_.data(), - out_->mutable_data(gpu)); + constexpr int MAX_BLOCK_DIM = 512; + const int MAX_GRID_DIM = ctx->GetMaxPhysicalThreadCount() / MAX_BLOCK_DIM; + int element_num = in_.numel(); + int block_size = (element_num >= MAX_BLOCK_DIM) + ? MAX_BLOCK_DIM + : (1 << static_cast(std::log2(element_num))); + int grid_size = element_num / block_size; + grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size; + BothFalse<<stream()>>>( + in_.data(), out_->mutable_data(gpu), element_num); #endif } void VisitorImpl(const platform::CPUPlace& cpu) const { - bool lhs = !in_.data()[0]; - bool rhs = !out_->mutable_data(cpu)[0]; - out_->mutable_data(cpu)[0] = lhs && rhs; + int num = in_.numel(); + const bool* in_ptr = in_.data(); + bool* out_ptr = out_->data(); + for (int i = 0; i < num; ++i) { + bool lhs = !in_ptr[i]; + bool rhs = !out_ptr[i]; + out_ptr[i] = lhs && rhs; + } } void VisitorImpl( const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const { - bool lhs = !in_.data()[0]; - bool rhs = !out_->mutable_data(cpu)[0]; - out_->mutable_data(cpu)[0] = lhs && rhs; + int num = in_.numel(); + const bool* in_ptr = in_.data(); + bool* out_ptr = out_->data(); + for (int i = 0; i < num; ++i) { + bool lhs = !in_ptr[i]; + bool rhs = !out_ptr[i]; + out_ptr[i] = lhs && rhs; + } } }; @@ -414,6 +608,15 @@ void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) { platform::VisitPlace(place, visitor); } +void TensorIsfiniteV2(const framework::Tensor& tensor, framework::Tensor* out) { + framework::Tensor tmp; + TensorContainsInfV2(tensor, &tmp); + TensorContainsNANV2(tensor, out); + BothFalseVisitor visitor(tmp, out); + auto place = tensor.place(); + platform::VisitPlace(place, visitor); +} + void TensorToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx) { { // the 1st field, uint32_t version @@ -463,6 +666,28 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, #else PADDLE_THROW(platform::errors::Unimplemented( "CUDAPlace is not supported when not compiled with CUDA")); +#endif + } else if (platform::is_xpu_place(tensor.place())) { +#ifdef PADDLE_WITH_XPU + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto& xpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + BOOST_GET_CONST(platform::XPUPlace, tensor.place()), + reinterpret_cast(data), size_to_write); + xpu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW(platform::errors::Unimplemented( + "XPUPlace is not supported when not compiled with XPU")); #endif } else { os.write(static_cast(data_ptr), @@ -517,8 +742,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, void* buf; auto ctx = platform::CPUDeviceContext(); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); - if (platform::is_gpu_place(dev_ctx.GetPlace())) { -#ifdef PADDLE_WITH_CUDA + if (platform::is_gpu_place(dev_ctx.GetPlace()) || + platform::is_xpu_place(dev_ctx.GetPlace())) { +#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU Tensor cpu_tensor; cpu_tensor.Resize(framework::make_ddim(shape)); framework::VisitDataType( @@ -528,8 +754,13 @@ void TensorFromStream(std::istream& is, Tensor* tensor, auto dst_place = dev_ctx.GetPlace(); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); #else - PADDLE_THROW(platform::errors::Unimplemented( - "CUDAPlace is not supported when not compiled with CUDA")); + if (platform::is_gpu_place(dev_ctx.GetPlace())) { + PADDLE_THROW(platform::errors::Unimplemented( + "CUDAPlace is not supported when not compiled with CUDA")); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "XPUPlace is not supported when not compiled with XPU")); + } #endif } else { framework::VisitDataType( @@ -568,8 +799,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, void* buf; auto ctx = platform::CPUDeviceContext(); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); - if (platform::is_gpu_place(dev_ctx.GetPlace())) { -#ifdef PADDLE_WITH_CUDA + if (platform::is_gpu_place(dev_ctx.GetPlace()) || + platform::is_xpu_place(dev_ctx.GetPlace())) { +#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU Tensor cpu_tensor; cpu_tensor.Resize(framework::make_ddim(dims)); framework::VisitDataType( @@ -579,8 +811,13 @@ void TensorFromStream(std::istream& is, Tensor* tensor, auto dst_place = dev_ctx.GetPlace(); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); #else - PADDLE_THROW(platform::errors::Unimplemented( - "CUDAPlace is not supported when not compiled with CUDA")); + if (platform::is_gpu_place(dev_ctx.GetPlace())) { + PADDLE_THROW(platform::errors::Unimplemented( + "CUDAPlace is not supported when not compiled with CUDA")); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "XPUPlace is not supported when not compiled with XPU")); + } #endif } else { framework::VisitDataType( @@ -665,6 +902,9 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst) { reinterpret_cast(*ctx).stream()); } #endif +#ifdef PADDLE_WITH_XPU + PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); +#endif } template diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index c71327da64042aed85f1247f3c31de3e66a588ba..fce0142b41d3ae9b2a6fcd4f16d38b0492fbd806 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -76,6 +76,13 @@ void TensorFromStream(std::istream& is, Tensor* tensor, const platform::DeviceContext& dev_ctx, const size_t& seek, const std::vector& shape); +// store the bool result tensor in out tensor +void TensorContainsNANV2(const framework::Tensor& tensor, + framework::Tensor* out); +void TensorContainsInfV2(const framework::Tensor& tensor, + framework::Tensor* out); +void TensorIsfiniteV2(const framework::Tensor& tensor, framework::Tensor* out); + // convert dlpack's DLTensor to tensor void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst); diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index f5fc5944709fc94ef23b878a5f58c9cb1dfed63a..7caeb4378ce3d1ca1d1557054642c9fa184bea39 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -76,6 +76,13 @@ class TensorAddFunctor : public boost::static_visitor<> { blas.AXPY(numel_, 1., x_, y_); } + void operator()(const platform::XPUPlace& place) { + PADDLE_THROW(platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } + #ifdef PADDLE_WITH_CUDA void operator()(const platform::CUDAPlace& place) { platform::CUDADeviceContext* ctx = diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 82b91d2e77292dbefae54d0f7ecb7a2aff00f979..4e0e95dd012976c292b4511e9707802c210dc599 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -42,23 +42,17 @@ static void PrepareData(const platform::Place& place, for (const auto& var_base : name_pair.second) { const auto* tensor = GetTensorFromVar(var_base->Var()); if (tensor && tensor->IsInitialized()) { - auto tmp_place = tensor->place(); - - // TODO(jiabin): Support transform data layout when we Verify it on more - // tests - if (!(tmp_place == place)) { - auto kernel_type_for_var = op.GetKernelTypeForVar( - name_pair.first, *tensor, expected_kernel_key); - if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) { - continue; - } else { - VLOG(3) << "Transform Variable " << var_base->Name() << " from " - << kernel_type_for_var << " to " << expected_kernel_key; - framework::Tensor out; - TransformData(expected_kernel_key, kernel_type_for_var, *tensor, - &out); - SetTensorToVariable(var_base->Var(), out, var_base->MutableVar()); - } + auto kernel_type_for_var = op.GetKernelTypeForVar( + name_pair.first, *tensor, expected_kernel_key); + if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) { + continue; + } else { + VLOG(3) << "Transform Variable " << var_base->Name() << " from " + << kernel_type_for_var << " to " << expected_kernel_key; + framework::Tensor out; + TransformData(expected_kernel_key, kernel_type_for_var, *tensor, + &out); + SetTensorToVariable(var_base->Var(), out, var_base->MutableVar()); } } } @@ -93,12 +87,26 @@ PreparedOp PrepareOpImpl(const NameVarMap& ins, auto& kernels = kernels_iter->second; framework::RuntimeContext ctx({}, {}); +#ifdef PADDLE_WITH_MKLDNN + // MKLDNN variant of code reads attributes in some of GetKernelTypeForVar and + // GetKernelType functions, so we need to copy the attributes there. + // Const qualifier of Attrs had to be discarded to overwrite it. + auto& mutable_op_attrs = const_cast(op.Attrs()); + mutable_op_attrs = attrs; +#endif auto expected_kernel_key = op.GetExpectedKernelType(DygraphExecutionContext( op, framework::Scope(), *dev_ctx, ctx, ins, outs, attrs)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); +#ifdef PADDLE_WITH_XPU + if (kernel_iter == kernels.end() && + is_xpu_place(expected_kernel_key.place_)) { + expected_kernel_key.place_ = platform::CPUPlace(); + kernel_iter = kernels.find(expected_kernel_key); + } +#endif // TODO(jiabin): Add operator.cc's line 1000 part back when we need that case PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), platform::errors::NotFound( diff --git a/paddle/fluid/imperative/tests/test_prepare_op.cc b/paddle/fluid/imperative/tests/test_prepare_op.cc index c2e30b45a7f6c06ee6eb8945922a4317e9060491..f226c63f0c432e3878c7df6a5a04433ce047ff26 100644 --- a/paddle/fluid/imperative/tests/test_prepare_op.cc +++ b/paddle/fluid/imperative/tests/test_prepare_op.cc @@ -176,7 +176,7 @@ TEST(test_prepare_op, test_prepare_data) { } #endif -TEST(test_prepare_op, test_prepare_data_same_place) { +void TestPrepareDataSamePlace(framework::AttributeMap attr_map) { std::shared_ptr vin( new imperative::VarBase(false, "vin")); std::shared_ptr vout( @@ -198,7 +198,6 @@ TEST(test_prepare_op, test_prepare_data_same_place) { var_pair out_pair = var_pair("Out", vb_vector(1, vout)); imperative::NameVarBaseMap ins = {x_pair}; imperative::NameVarBaseMap outs = {out_pair}; - framework::AttributeMap attr_map; const std::string op_type = "relu"; const auto& info = framework::OpInfoMap::Instance().Get(op_type); if (info.Checker()) info.Checker()->Check(&attr_map); @@ -222,8 +221,21 @@ TEST(test_prepare_op, test_prepare_data_same_place) { } } } + +TEST(test_prepare_op, test_prepare_data_same_place) { + TestPrepareDataSamePlace({}); +} + +#ifdef PADDLE_WITH_MKLDNN +TEST(test_prepare_op, test_prepare_data_cpu_mkldnn) { + TestPrepareDataSamePlace({{"use_mkldnn", true}}); +} +#endif } // namespace imperative } // namespace paddle USE_OP(split); USE_OP(relu); +#ifdef PADDLE_WITH_MKLDNN +USE_OP_DEVICE_KERNEL(relu, MKLDNN); +#endif diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index fdd71b0d884004c84e2ee15eea522c64ff943dd9..1a3413657ce6fac41603d691dcdb61ddb1d6320a 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -83,7 +83,12 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, } else if (shape.size() == 3UL) { return nvinfer1::Dims3(shape[0], shape[1], shape[2]); } - return nvinfer1::Dims4(shape[0], shape[1], 1, 1); + nvinfer1::Dims dims; + dims.nbDims = shape.size(); + for (size_t i = 0; i < shape.size(); i++) { + dims.d[i] = shape[i]; + } + return dims; } } } // NOLINT diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 70ead9720d2ebcb15ae0173dc0ba7c2095a4f4d4..f5d22b982de2b41474d97565a44dedc67c8a85d7 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -24,6 +24,8 @@ struct SimpleOpTypeSetTeller : public Teller { #if IS_TRT_VERSION_GE(5130) teller_set.insert("relu6"); teller_set.insert("hard_sigmoid"); + int8_teller_set.insert("relu6"); + int8_teller_set.insert("hard_sigmoid"); #endif #if IS_TRT_VERSION_GE(6000) teller_set.insert("fused_embedding_eltwise_layernorm"); @@ -53,11 +55,11 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_add", "leaky_relu", "fc", - "relu6", "concat", "scale", "elementwise_mul", - "conv2d_transpose"}; + "conv2d_transpose", + "hard_swish"}; std::unordered_set teller_set{ "mul", "conv2d", diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu index e7f9381e97137d77d27b54cac910bfee9f629464..5e43be90de3dbbfef3c7d3def7e37904bb644380 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu @@ -76,6 +76,16 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( return ret; } +template +void EmbEltwiseLayernormPluginDynamic::terminate() { + for (auto ptr : embs_gpu_) { + if (ptr) cudaFree(ptr); + } + + if (bias_gpu_) cudaFree(bias_gpu_); + if (scale_gpu_) cudaFree(scale_gpu_); +} + template bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, @@ -153,7 +163,7 @@ int EmbEltwiseLayernormPluginDynamic::enqueue( int64_t *emb_ptr_gpu_d = emb_ptr_tensor.mutable_data(platform::CUDAPlace(device_id)); - std::vector in_ptr, emb_ptr; + std::vector in_ptr, emb_ptr; for (int i = 0; i < input_num; i++) { in_ptr.push_back(reinterpret_cast(inputs[i])); emb_ptr.push_back(reinterpret_cast(embs_gpu_[i])); diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h index 8ac611cd7c62fddfd4f01d7705b841abc28501d3..5babd87db0602e973452efa613fcaf9810d29afa 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h @@ -81,9 +81,13 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { } nvinfer1::IPluginV2DynamicExt* clone() const override { - return new EmbEltwiseLayernormPluginDynamic( + auto ptr = new EmbEltwiseLayernormPluginDynamic( embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, hidden_size_, eps_); + ptr->embs_gpu_ = embs_gpu_; + ptr->bias_gpu_ = bias_gpu_; + ptr->scale_gpu_ = scale_gpu_; + return ptr; } const char* getPluginType() const override { @@ -111,6 +115,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { return sum_num; } + void terminate() override; void serialize(void* buffer) const override { // SerializeValue(&buffer, with_fp16_); SerializeValue(&buffer, emb_sizes_); diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu index f1e11b6fba1f1556e2a8a2aaaca1223aaef76b03..860f1039d5e10290d84d1761bc7337e49fa210eb 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu @@ -80,6 +80,12 @@ int PReluPlugin::enqueue(int batch_size, const void *const *inputs, #if IS_TRT_VERSION_GE(6000) +void PReluPluginDynamic::terminate() { + if (p_gpu_weight_) { + cudaFree(p_gpu_weight_); + } +} + int PReluPluginDynamic::initialize() { cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float), diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h index 4756ca2e0225795edc3bd3112b21e3b628ad5c0b..3126366c5fdd8bb69a78cea11f5778c45de738ec 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h @@ -102,12 +102,15 @@ class PReluPluginDynamic : public DynamicPluginTensorRT { } ~PReluPluginDynamic() { cudaFree(p_gpu_weight_); } nvinfer1::IPluginV2DynamicExt* clone() const override { - return new PReluPluginDynamic(weight_.data(), weight_.size(), mode_); + auto ptr = new PReluPluginDynamic(weight_.data(), weight_.size(), mode_); + ptr->p_gpu_weight_ = p_gpu_weight_; + return ptr; } const char* getPluginType() const override { return "prelu_plugin"; } int getNbOutputs() const override { return 1; } int initialize() override; + void terminate() override; size_t getSerializationSize() const override; void serialize(void* buffer) const override; diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h index 8fe1edc4bf0321b054322a27f0c16819bc023ed8..24cd8e0368182ae597e48765bc0167ca1eca6bd3 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h @@ -51,8 +51,11 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { } nvinfer1::IPluginV2DynamicExt* clone() const override { - return new SkipLayerNormPluginDynamic( + auto ptr = new SkipLayerNormPluginDynamic( bias_.data(), scale_.data(), bias_size_, scale_size_, eps_, ban_fp16_); + ptr->bias_gpu_ = bias_gpu_; + ptr->scale_gpu_ = bias_gpu_; + return ptr; } const char* getPluginType() const override { return "skip_layernorm_plugin"; } diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 959ba2288acc0d214a82b5b1abc1556e1cac3e2a..9a3a73f6c946d76f0da1b18feb8e7d61c0bf59b6 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -471,19 +471,10 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz") endif() - inference_analysis_test(test_trt_dynamic_shape_ernie_serialize SRCS trt_dynamic_shape_ernie_deserialize_test.cc + inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_deserialize_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) - set(TEST_TRT_ERNIE_SER_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test/ernie_model_4_serialized/") - if (NOT EXISTS ${TEST_TRT_ERNIE_SER_MODEL}) - inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_serialized.tgz") - endif() - - inference_analysis_test(test_trt_dynamic_shape_ernie_deserialize SRCS trt_dynamic_shape_ernie_deserialize_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_serialized) - endif() set(LITE_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lite") diff --git a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc index 0aea47ae7fab1be3bafe35af575e9a2bea2d8420..5840a4c42b3b1065410dc1509cf0cee2480bd596 100644 --- a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc @@ -66,7 +66,7 @@ TEST(AnalysisPredictor, use_gpu) { float* data_o = static_cast(outputs[0].data.data()); for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); j += 10) { EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., - 10e-5); + 12e-5); } } diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc index 6526b87436557b7f0c5c6dc5d3b59f2d70323d84..7e5dfa2424dbca4fb3a8a08e3d7fa7fbc3060d3d 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc @@ -123,8 +123,11 @@ void trt_ernie(bool with_fp16, std::vector result) { config.EnableTensorRtEngine(1 << 30, 1, 5, precision, true, false); config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, opt_input_shape); + AnalysisConfig* config_deser = new AnalysisConfig(config); + std::vector out_data; - run(config, &out_data); + run(config, &out_data); // serialize + run(*config_deser, &out_data); // deserialize for (size_t i = 0; i < out_data.size(); i++) { EXPECT_NEAR(result[i], out_data[i], 1e-6); } diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc index babe9977cd571f588f0bdc5a6723d4b05afab72b..c99ebcdcb5f319f73b7fd931d13f27684db39cad 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc @@ -126,7 +126,7 @@ void trt_ernie(bool with_fp16, std::vector result) { std::vector out_data; run(config, &out_data); for (size_t i = 0; i < out_data.size(); i++) { - EXPECT_NEAR(result[i], out_data[i], 1e-6); + EXPECT_NEAR(result[i], out_data[i], 1e-5); } } diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index bd1908ac65509343530aa57489661637eed72595..9cc7c267454a4dbd4e1f62ec971e4160d6088913 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -23,6 +23,8 @@ cc_library(retry_allocator SRCS retry_allocator.cc DEPS allocator) nv_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator) if (WITH_GPU) set(AllocatorFacadeDeps gpu_info cuda_allocator pinned_allocator cuda_device_guard thread_local_allocator) +elseif(WITH_XPU) + set(AllocatorFacadeDeps xpu_info) else () set(AllocatorFacadeDeps) endif() diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 2ab0d69ef806155adbff83e523a1242e51c2c7fc..3213684c140b02e1fa4b846cb0448f9bc9d8f3ee 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -39,6 +39,9 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/gpu_info.h" #endif +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_info.h" +#endif DEFINE_int64( gpu_allocator_retry_time, 10000, @@ -62,6 +65,11 @@ class AllocatorFacadePrivate { switch (strategy) { case AllocatorStrategy::kNaiveBestFit: { InitNaiveBestFitCPUAllocator(); +#ifdef PADDLE_WITH_XPU + for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) { + InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id)); + } +#endif #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { @@ -74,6 +82,11 @@ class AllocatorFacadePrivate { case AllocatorStrategy::kAutoGrowth: { InitNaiveBestFitCPUAllocator(); +#ifdef PADDLE_WITH_XPU + for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) { + InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id)); + } +#endif #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { @@ -86,6 +99,11 @@ class AllocatorFacadePrivate { case AllocatorStrategy::kThreadLocal: { InitNaiveBestFitCPUAllocator(); +#ifdef PADDLE_WITH_XPU + for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) { + InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id)); + } +#endif #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { @@ -127,6 +145,13 @@ class AllocatorFacadePrivate { private: void InitSystemAllocators() { system_allocators_[platform::CPUPlace()] = std::make_shared(); +#ifdef PADDLE_WITH_XPU + int device_count = platform::GetXPUDeviceCount(); + for (int i = 0; i < device_count; ++i) { + platform::XPUPlace p(i); + system_allocators_[p] = std::make_shared(p); + } +#endif #ifdef PADDLE_WITH_CUDA system_allocators_[platform::CUDAPinnedPlace()] = std::make_shared(); @@ -164,6 +189,12 @@ class AllocatorFacadePrivate { } #endif +#ifdef PADDLE_WITH_XPU + void InitNaiveBestFitXPUAllocator(platform::XPUPlace p) { + allocators_[p] = std::make_shared(p); + } +#endif + class ZeroSizeAllocator : public Allocator { public: explicit ZeroSizeAllocator(platform::Place place) : place_(place) {} @@ -191,6 +222,12 @@ class AllocatorFacadePrivate { } places.emplace_back(platform::CUDAPinnedPlace()); #endif +#ifdef PADDLE_WITH_XPU + int device_count = platform::GetXPUDeviceCount(); + for (int dev_id = 0; dev_id < device_count; ++dev_id) { + places.emplace_back(platform::XPUPlace(dev_id)); + } +#endif for (auto& p : places) { zero_size_allocators_[p] = std::make_shared(p); diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index 907a266e7b2bcd30e65ca71ab3dbae7f9b110b3b..92e3933a072832fa42520e67f455d3dc90118518 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -29,6 +29,9 @@ #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_device_guard.h" #endif +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_header.h" +#endif DEFINE_bool(init_allocated_mem, false, "It is a mistake that the values of the memory allocated by " @@ -101,6 +104,100 @@ size_t Used(const platform::CPUPlace &place) { return GetCPUBuddyAllocator()->Used(); } +template <> +void *Alloc(const platform::XPUPlace &place, size_t size) { +#ifdef PADDLE_WITH_XPU + VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); + void *p = nullptr; + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + ret = xpu_set_device(place.device); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + ret = xpu_malloc(reinterpret_cast(&p), size); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (FLAGS_init_allocated_mem) { + PADDLE_THROW(platform::errors::Unimplemented( + "xpu memory FLAGS_init_allocated_mem is not implemented.")); + } + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + VLOG(10) << " pointer=" << p; + return p; +#else + PADDLE_THROW( + platform::errors::PermissionDenied("'XPUPlace' is not supported.")); + return nullptr; +#endif +} + +template <> +void Free(const platform::XPUPlace &place, void *p, + size_t size) { +#ifdef PADDLE_WITH_XPU + VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); + VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + ret = xpu_set_device(place.device); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + xpu_free(p); + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); +#else + PADDLE_THROW( + platform::errors::PermissionDenied("'XPUPlace' is not supported.")); +#endif +} + +template <> +size_t Used(const platform::XPUPlace &place) { +#ifdef PADDLE_WITH_XPU + printf("Used func return 0 for XPUPlace\n"); + return 0; +#else + PADDLE_THROW( + platform::errors::PermissionDenied("'XPUPlace' is not supported.")); +#endif +} + #ifdef PADDLE_WITH_CUDA class GPUBuddyAllocatorList { private: diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index b19f02db1c0ddf17c84536bf5d512bbd823909b2..225b6858cc1f2a5afc9d612958694d0d940e2e7b 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_header.h" +#endif + namespace paddle { namespace memory { @@ -29,6 +33,169 @@ void Copy(platform::CPUPlace, void* dst, std::memcpy(dst, src, num); } +#ifdef PADDLE_WITH_XPU +template <> +void Copy(platform::XPUPlace dst_place, + void* dst, + platform::CPUPlace src_place, + const void* src, size_t num) { + if (num <= 0) { + VLOG(0) << "memcpy XPU_HOST_TO_DEVICE size <= 0 (" << num << ")"; + return; + } + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + if (dev_id != dst_place.device) { + ret = xpu_set_device(dst_place.device); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + } + ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_HOST_TO_DEVICE); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id != dst_place.device) { + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + } +} + +template <> +void Copy(platform::CPUPlace dst_place, + void* dst, + platform::XPUPlace src_place, + const void* src, size_t num) { + if (num <= 0) { + VLOG(0) << "memcpy XPU_DEVICE_TO_HOST size <= 0 (" << num << ")"; + return; + } + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + if (dev_id != src_place.device) { + ret = xpu_set_device(src_place.device); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + } + ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id != src_place.device) { + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + } +} + +template <> +void Copy(platform::XPUPlace dst_place, + void* dst, + platform::XPUPlace src_place, + const void* src, size_t num) { + if (num <= 0) { + VLOG(0) << "memcpy XPU_DEVICE_TO_DEVICE size <= 0 (" << num << ")"; + return; + } + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + if (dev_id != src_place.device || dev_id != dst_place.device) { + ret = xpu_set_device(src_place.device); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + void* tmp = malloc(num); + ret = xpu_memcpy(tmp, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + ret = xpu_set_device(dst_place.device); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + ret = xpu_memcpy(dst, tmp, num, XPUMemcpyKind::XPU_HOST_TO_DEVICE); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + free(tmp); + } else { + int ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_DEVICE); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + } +} +#endif + #ifdef PADDLE_WITH_CUDA static constexpr size_t kMaxGpuAsyncCopyBytes = 64 * 1024; // 64K diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index e74f363d886e4601d07c1b2a7d79d8c915b59e93..6e8ff52ed4a8846f5f6060e10cfd9bec22308e9e 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -88,7 +88,9 @@ endif() cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor device_memory_aligment) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows +lod_tensor maxouting unpooling pooling lod_rank_table context_project +sequence_pooling executor device_memory_aligment generator) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse) @@ -121,7 +123,7 @@ cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_t cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) -nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor) +nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor generator) if (WITH_GPU) nv_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc test_leaky_relu_grad_grad_functor.cu DEPS tensor device_context eigen3) else() diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 107d333d3a8593e6e3d7afb38c7688d80f2441f8..63b3b0f1a3408154a2d1c8aff76a85a95ad044f6 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -219,7 +219,7 @@ $$out = \\frac{1}{\\sqrt{x}}$$ )DOC"; UNUSED constexpr char AbsDoc[] = R"DOC( -Abs Activation Operator. +Abs Operator. $$out = |x|$$ @@ -242,6 +242,9 @@ $$out = \\left \\lfloor x \\right \\rfloor$$ UNUSED constexpr char CosDoc[] = R"DOC( Cosine Operator. Computes cosine of x element-wise. +Input range is `(-inf, inf)` and output range is `[-1,1]`. +Return `nan` if input is out of boundary. + $$out = cos(x)$$ )DOC"; @@ -314,13 +317,6 @@ $$out = x^2$$ )DOC"; -UNUSED constexpr char SoftplusDoc[] = R"DOC( -Softplus Activation Operator. - -$$out = \ln(1 + e^{x})$$ - -)DOC"; - UNUSED constexpr char SoftsignDoc[] = R"DOC( Softsign Activation Operator. @@ -334,7 +330,7 @@ class AcosOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of acos operator"); AddOutput("Out", "Output of acos operator"); AddComment(R"DOC( -Arccosine Activation Operator. +Arccosine Operator. $$out = \cos^{-1}(x)$$ @@ -348,7 +344,7 @@ class AsinOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of asin operator"); AddOutput("Out", "Output of asin operator"); AddComment(R"DOC( -Arcsine Activation Operator. +Arcsine Operator. $$out = \sin^{-1}(x)$$ @@ -362,9 +358,9 @@ class AtanOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of atan operator"); AddOutput("Out", "Output of atan operator"); AddComment(R"DOC( -Arctanh Activation Operator. +Arctangent Operator. -$$out = \tanh^{-1}(x)$$ +$$out = \tan^{-1}(x)$$ )DOC"); } @@ -393,6 +389,36 @@ $$out = \max(x, \alpha * x)$$ } }; +class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "Input of Softplus operator, an N-D Tensor, with data type " + "float32, float64 or float16."); + AddOutput( + "Out", + "Output of Softplus operator, a Tensor with shape same as input."); + AddAttr("beta", "The value of beta for Softplus.").SetDefault(1.0f); + AddAttr("threshold", "The value of threshold for Softplus.") + .SetDefault(20.0f); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel.") + .SetDefault(false); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn.") + .SetDefault(false); + AddComment(R"DOC( +:strong:`Softplus Activation Operator` + +.. math:: + out = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) \\ + \text{For numerical stability, the implementation reverts to the linear function when :}\,x \times \beta > threshold. + +)DOC"); + } +}; + class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { @@ -669,7 +695,6 @@ REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc); REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc); REGISTER_ACTIVATION_OP_MAKER(Log1p, Log1pDoc); REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc); -REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc); REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); template @@ -756,8 +781,8 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { } }; -// leaky_relu Grad: dx=dy if y>=0 else alpha * dy -// leaky_relu GradGrad: ddy=ddx if y>=0 else alpha * ddx +// leaky_relu Grad: dx=dy if x>=0 else alpha * dy +// leaky_relu GradGrad: ddy=ddx if x>=0 else alpha * ddx template class LeakyReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { @@ -767,8 +792,8 @@ class LeakyReluDoubleGradMaker protected: void Apply(GradOpPtr op) const override { op->SetType("leaky_relu_grad_grad"); - // input1: Out - op->SetInput("Out", this->Input("Out")); + // input1: X + op->SetInput("X", this->Input("X")); // X@GRAD@GRAD: ddx op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); op->SetAttrMap(this->Attrs()); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 3aac7ae8a5e8a9e889242b59f42a29af08ad1c46..00a7c063c9155488d117332d5ef3541d16d76bdb 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -388,9 +388,9 @@ struct HardShrinkFunctor : public BaseActivationFunctor { } template void operator()(Device d, X x, Out out) const { - auto temp1 = (x < static_cast(threshold * -1)).template cast(); - auto temp2 = (x > static_cast(threshold)).template cast(); - out.device(d) = x * (temp1 + temp2); + auto temp1 = x < static_cast(threshold * -1.f); + auto temp2 = x > static_cast(threshold); + out.device(d) = x * (temp1 + temp2 > 0).template cast(); } }; @@ -405,9 +405,9 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - auto temp1 = (x < static_cast(threshold * -1)).template cast(); - auto temp2 = (x > static_cast(threshold)).template cast(); - dx.device(d) = dout * (temp1 + temp2).template cast(); + auto temp1 = x < static_cast(threshold * -1.f); + auto temp2 = x > static_cast(threshold); + dx.device(d) = dout * (temp1 + temp2 > 0).template cast(); } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } @@ -975,32 +975,46 @@ struct HardSwishGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; -// softplus(x) = log(1 + exp(x)) -// When x is a very large positive number, exp(x) may explode to inf, -// Using trick below for numerical stability -// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ -// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0))) +// For numerical stability, using the following formula instead of softplus(x) = +// log(1 + exp(x)) +// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta = +// 1, threshold = 20 by default), otherwise x template struct SoftplusFunctor : public BaseActivationFunctor { + float beta; + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + template void operator()(Device d, X x, Out out) { - auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) - out.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); + auto x_beta = static_cast(beta) * x; + out.device(d) = (x_beta > static_cast(threshold)) + .select(x, (static_cast(1) + x_beta.exp()).log() / + static_cast(beta)); } }; -// d(softplus(x))/dx = exp(x) / (1 + exp(x)) -// For numerical stability: -// d(softplus(x))/dx = exp(x - max(x, 0)) / (exp(-max(x, 0)) + -// exp(x - max(x, 0))) +// For numerical stability, using the following formula instead of +// d(softplus(x))/dx = 1 / (1 + exp(-x)) +// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta +// = 1, threshold = 20 by default), otherwise x template struct SoftplusGradFunctor : public BaseActivationFunctor { + float beta; + float threshold; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"beta", &beta}, {"threshold", &threshold}}; + } + template void operator()(Device d, X x, Out out, dOut dout, dX dx) { - auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) + auto x_beta = static_cast(beta) * x; dx.device(d) = - dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); + (x_beta > static_cast(threshold)) + .select(dout, dout / (static_cast(1) + (-x_beta).exp())); } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } @@ -1070,7 +1084,11 @@ struct LeakyReluFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out) const { - out.device(d) = x.cwiseMax(static_cast(alpha) * x); + if (alpha < 1.f) { + out.device(d) = x.cwiseMax(static_cast(alpha) * x); + } else { + out.device(d) = x.cwiseMin(static_cast(alpha) * x); + } } }; @@ -1084,12 +1102,12 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { typename dX> void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = - static_cast(alpha) * (out <= static_cast(0)).template cast(); - auto temp2 = (out > static_cast(0)).template cast(); + static_cast(alpha) * (x < static_cast(0)).template cast(); + auto temp2 = (x >= static_cast(0)).template cast(); dx.device(d) = dout * (temp1 + temp2).template cast(); } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; template @@ -1116,9 +1134,20 @@ struct ELUGradFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (x > static_cast(0)).template cast() + - dout * static_cast(alpha) * x.exp() * - (x <= static_cast(0)).template cast(); + auto temp_a_pos = static_cast(alpha > 0); + auto temp_a_neg = static_cast(alpha <= 0); + auto temp_x_pos = (x > static_cast(0)).template cast(); + auto temp_x_neg = (x <= static_cast(0)).template cast(); + + // dx = dout, if alpha > 0 and x > 0 + // dx = dout * alpha * x.exp(), if alpha > 0 and x <= 0 + // dx = dout * (1 + alpha * x.exp()), if alpha <= 0 and x > 0 + // dx = 0, if alpha <= 0 and x <=0 + dx.device(d) = + dout * temp_a_pos * temp_x_pos + + dout * static_cast(alpha) * x.exp() * temp_a_pos * temp_x_neg + + dout * (static_cast(1) + static_cast(alpha) * x.exp()) * + temp_a_neg * temp_x_pos; } static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } @@ -1437,18 +1466,18 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { auto* d = dev.eigen_device(); auto ddx = framework::EigenVector::Flatten( GET_DATA_SAFELY(ddX, "Input", "DDX", "LeakyReluGradGrad")); - auto out = framework::EigenVector::Flatten( - GET_DATA_SAFELY(Out, "Output", "Out", "LeakyReluGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "LeakyReluGradGrad")); auto ddout = framework::EigenVector::Flatten( GET_DATA_SAFELY(ddOut, "Output", "DOut", "LeakyReluGradGrad")); - ddout.device(*d) = ddx * - ((out > static_cast(0)).template cast() + - static_cast(alpha) * - (out <= static_cast(0)).template cast()) - .template cast(); + ddout.device(*d) = + ddx * + ((x > static_cast(0)).template cast() + + static_cast(alpha) * (x <= static_cast(0)).template cast()) + .template cast(); } } - static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } }; template diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index f7cc513b234e6e440507af28189ac236b71f9d15..d1a3695015abdb9ce13c4f807d1abacdf0af024d 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -28,10 +28,15 @@ using Tensor = framework::Tensor; template struct Linspace { - void operator()(T start, T end, int count, framework::Tensor* numbers, + void operator()(T start, T end, int count, bool align_corners, + framework::Tensor* numbers, const framework::ExecutionContext& ctx) { T* number_data = numbers->mutable_data({count}, platform::CPUPlace()); T slice = (end - start) / (T)(count - 1); + if (!align_corners) { + slice = (end - start) / (T)count; + start *= (T)(count - 1) / (T)count; + } for (int i = 0; i < count; ++i) { number_data[i] = start + (T)i * slice; } @@ -130,6 +135,10 @@ class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker { "use_cudnn", "(bool, default false) Only used in cudnn kernel, need install cudnn") .SetDefault(true); + AddAttr("align_corners", + "(bool, default false) Whether to align the corners of input" + "and ouput.") + .SetDefault(true); AddAttr>( "output_shape", "The target output image shape with format [N, C, H, W].") @@ -164,10 +173,12 @@ class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker { [-1. -0.5 0. 0.5 1. ] [-1. -0.5 0. 0.5 1. ] [-1. -0.5 0. 0.5 1. ]]] - C[0] is the coordinates in height axis and C[1] is the coordinates in width axis. + C[0] is the coordinates in height axis and C[1] is the coordinates in + width axis. Step2: - Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get: + Tanspose and reshape C to shape [H * W, 2] and append ones to last + dimension. The we get: C_ = [[-1. -1. 1. ] [-0.5 -1. 1. ] [ 0. -1. 1. ] diff --git a/paddle/fluid/operators/affine_grid_op.cu b/paddle/fluid/operators/affine_grid_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..eca8246533fea4df5a057252ce8a79aef2bfe565 --- /dev/null +++ b/paddle/fluid/operators/affine_grid_op.cu @@ -0,0 +1,209 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/affine_grid_op.h" +#include "paddle/fluid/platform/cuda_device_function.h" +#include "paddle/fluid/platform/gpu_info.h" +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) { + CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; } +} + +template +struct Linspace { + void operator()(T start, T end, int count, bool align_corners, + framework::Tensor* numbers, + const framework::ExecutionContext& ctx) { + T* number_data = numbers->mutable_data({count}, ctx.GetPlace()); + T slice = (end - start) / (T)(count - 1); + if (!align_corners) { + slice = (end - start) / (T)count; + start *= (T)(count - 1) / (T)count; + } + auto stream = ctx.cuda_device_context().stream(); + int block = 512; + int grid = (count + block - 1) / block; + LinspaceKernel<<>>(start, slice, count, + number_data); + } +}; + +template +__global__ void affine_grid_kernel(const int count, int n, int out_h, int out_w, + T h_start, T w_start, T h_step, T w_step, + const T* theta, // N, 2, 3 + T* output) { + CUDA_KERNEL_LOOP(index, count) { + int w = index % out_w; + int h = (index / out_w) % out_h; + int n = index / (out_w * out_h); + + T h_coor = h_step * static_cast(h) + static_cast(h_start); + T w_coor = w_step * static_cast(w) + static_cast(w_start); + + int theta_offset = n * 6; // 2 * 3; + // affine from (h_coor, w_coor) to (x, y) + output[index * 2] = theta[theta_offset] * h_coor + + theta[theta_offset + 1] * w_coor + + theta[theta_offset + 2]; + output[index * 2 + 1] = theta[theta_offset + 3] * h_coor + + theta[theta_offset + 4] * w_coor + + theta[theta_offset + 5]; + } +} + +template +__global__ void affine_grid_grad_kernel(const int count, int n, int out_h, + int out_w, T h_start, T w_start, + T h_step, T w_step, + const T* out_grad, // N, H, W, 2 + T* theta_grad) { // N, 2, 3 + CUDA_KERNEL_LOOP(index, count) { + int w = index % out_w; + int h = (index / out_w) % out_h; + int n = index / (out_w * out_h); + T h_coor = h_step * static_cast(h) + static_cast(h_start); + T w_coor = w_step * static_cast(w) + static_cast(w_start); + + int theta_offset = n * 6; // 2 * 3; + T out_grad_x = out_grad[index * 2]; + atomicAdd(theta_grad + theta_offset, out_grad_x * h_coor); + atomicAdd(theta_grad + theta_offset + 1, out_grad_x * w_coor); + atomicAdd(theta_grad + theta_offset + 2, out_grad_x); + + T out_grad_y = out_grad[index * 2 + 1]; + atomicAdd(theta_grad + theta_offset + 3, out_grad_y * h_coor); + atomicAdd(theta_grad + theta_offset + 4, out_grad_y * w_coor); + atomicAdd(theta_grad + theta_offset + 5, out_grad_y); + } +} + +template +class AffineGridOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* theta = ctx.Input("Theta"); + int n = theta->dims()[0]; + auto size_attr = ctx.Attr>("output_shape"); + auto align_corners = ctx.Attr("align_corners"); + int h = 0; + int w = 0; + if (size_attr.size() == 0) { + auto* output_shape = ctx.Input("OutputShape"); + Tensor h_sizes; + framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes); + const int* h_size_data = h_sizes.data(); + h = h_size_data[2]; + w = h_size_data[3]; + } else { + h = size_attr[2]; + w = size_attr[3]; + } + auto* output = ctx.Output("Output"); + T* out_data = output->mutable_data({n, h, w, 2}, ctx.GetPlace()); + + T h_step; + T w_step; + T h_start = -1; + T w_start = -1; + if (align_corners) { + h_step = static_cast(2) / static_cast(h - 1); + w_step = static_cast(2) / static_cast(w - 1); + } else { + h_step = static_cast(2) / static_cast(h); + w_step = static_cast(2) / static_cast(w); + + h_start *= static_cast(h - 1) / static_cast(h); + w_start *= static_cast(w - 1) / static_cast(w); + } + + const int count = n * h * w; + int block = 512; + int grid = (count + block - 1) / block; + auto cu_stream = ctx.cuda_device_context().stream(); + affine_grid_kernel<<>>( + count, n, h, w, h_start, w_start, h_step, w_step, + theta->data(), // N, 2, 3 + out_data); + } +}; + +template +class AffineGridGradOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto output_grad = ctx.Input(framework::GradVarName("Output")); + auto theta_grad = ctx.Output(framework::GradVarName("Theta")); + int n = output_grad->dims()[0]; + auto size_attr = ctx.Attr>("output_shape"); + auto align_corners = ctx.Attr("align_corners"); + int h = 0; + int w = 0; + if (size_attr.size() == 0) { + auto* output_shape = ctx.Input("OutputShape"); + Tensor h_sizes; + framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes); + const int* h_size_data = h_sizes.data(); + h = h_size_data[2]; + w = h_size_data[3]; + } else { + h = size_attr[2]; + w = size_attr[3]; + } + T* theta_grad_data = theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); + math::SetConstant()( + ctx.cuda_device_context(), theta_grad, static_cast(0)); + + T h_step; + T w_step; + T h_start = -1; + T w_start = -1; + if (align_corners) { + h_step = static_cast(2) / static_cast(h - 1); + w_step = static_cast(2) / static_cast(w - 1); + } else { + h_step = static_cast(2) / static_cast(h); + w_step = static_cast(2) / static_cast(w); + + h_start *= static_cast(h - 1) / static_cast(h); + w_start *= static_cast(w - 1) / static_cast(w); + } + const int count = n * h * w; + VLOG(3) << "count: " << count << "; h_step: " << h_step + << "; w_step: " << w_step << "; h_start: " << h_start + << "; w_start: " << w_start; + int block = 512; + int grid = (count + block - 1) / block; + auto cu_stream = ctx.cuda_device_context().stream(); + affine_grid_grad_kernel<<>>( + count, n, h, w, h_start, w_start, h_step, w_step, + output_grad->data(), theta_grad_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(affine_grid, ops::AffineGridOpCUDAKernel, + ops::AffineGridOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL(affine_grid_grad, + ops::AffineGridGradOpCUDAKernel, + ops::AffineGridGradOpCUDAKernel); diff --git a/paddle/fluid/operators/affine_grid_op.h b/paddle/fluid/operators/affine_grid_op.h index 73df8a38b96c30196a7e39d2cf1e348f2a7722ec..50c9ebcd9c8f52077d7f5d0abb10c631cbeee794 100644 --- a/paddle/fluid/operators/affine_grid_op.h +++ b/paddle/fluid/operators/affine_grid_op.h @@ -37,12 +37,13 @@ using Array4 = Eigen::DSizes; */ template struct Linspace { - void operator()(T start, T end, int count, framework::Tensor* numbers, + void operator()(T start, T end, int count, bool align_corners, + framework::Tensor* numbers, const framework::ExecutionContext& ctx); }; template -inline void GetIdxMap(int n, int h, int w, Tensor* grid, +inline void GetIdxMap(int n, int h, int w, bool align_corners, Tensor* grid, const framework::ExecutionContext& ctx) { auto& place = *ctx.template device_context().eigen_device(); grid->mutable_data({n, h, w, 3}, ctx.GetPlace()); @@ -50,16 +51,19 @@ inline void GetIdxMap(int n, int h, int w, Tensor* grid, // Get indexes of height with shape [height, width, 1] Tensor h_idx; Linspace linspace; - linspace((T)-1, (T)1, h, &h_idx, ctx); + linspace((T)-1, (T)1, h, align_corners, &h_idx, ctx); auto h_idx_t = EigenTensor::From(h_idx); // Get indexes of width with shape [height, width, 1] Tensor w_idx; - linspace((T)-1, (T)1, w, &w_idx, ctx); + linspace((T)-1, (T)1, w, align_corners, &w_idx, ctx); auto w_idx_t = EigenTensor::From(w_idx); // Get constant ones tensor with shape [height, width, 1] Tensor ones; ones.mutable_data({h, w, 1}, ctx.GetPlace()); - auto ones_t = EigenTensor::From(ones).setConstant((T)1); + + math::SetConstant()( + ctx.template device_context(), &ones, static_cast(1)); + auto ones_t = EigenTensor::From(ones); // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and // ones Tensor w_idx_map; @@ -74,11 +78,9 @@ inline void GetIdxMap(int n, int h, int w, Tensor* grid, Tensor w_h_one_idx_map; w_h_one_idx_map.mutable_data({h, w, 3}, ctx.GetPlace()); auto w_h_one_idx_map_t = EigenTensor::From(w_h_one_idx_map); - w_idx_map_t.device(place) = w_idx_t.reshape(Array2(1, w)) .broadcast(Array2(h, 1)) .reshape(Array3(h, w, 1)); - h_idx_map_t.device(place) = h_idx_t.reshape(Array2(1, h)) .broadcast(Array2(w, 1)) .shuffle(Array2(1, 0)) @@ -97,6 +99,7 @@ class AffineGridOpKernel : public framework::OpKernel { auto* theta = ctx.Input("Theta"); int n = theta->dims()[0]; auto size_attr = ctx.Attr>("output_shape"); + auto align_corners = ctx.Attr("align_corners"); int h = 0; int w = 0; if (size_attr.size() == 0) { @@ -116,7 +119,7 @@ class AffineGridOpKernel : public framework::OpKernel { ctx.template device_context(), output, static_cast(0)); Tensor grid; - GetIdxMap(n, h, w, &grid, ctx); + GetIdxMap(n, h, w, align_corners, &grid, ctx); // output = grid * theta.T // TODO(wanghaoshuang): Refine batched matrix multiply auto blas = math::GetBlas(ctx); @@ -140,6 +143,7 @@ class AffineGridGradOpKernel : public framework::OpKernel { auto theta_grad = ctx.Output(framework::GradVarName("Theta")); int n = output_grad->dims()[0]; auto size_attr = ctx.Attr>("output_shape"); + auto align_corners = ctx.Attr("align_corners"); int h = 0; int w = 0; if (size_attr.size() == 0) { @@ -158,7 +162,7 @@ class AffineGridGradOpKernel : public framework::OpKernel { ctx.template device_context(), theta_grad, static_cast(0)); Tensor grid; - GetIdxMap(n, h, w, &grid, ctx); + GetIdxMap(n, h, w, align_corners, &grid, ctx); // output = grid * theta.T // TODO(wanghaoshuang): Refine batched matrix multiply auto blas = math::GetBlas(ctx); diff --git a/paddle/fluid/operators/allclose_op.cc b/paddle/fluid/operators/allclose_op.cc index 911757007266c9ff88b0e348d350909ce0ff0bce..736483c3304ac32491de4fd98879fbfef04f7110 100644 --- a/paddle/fluid/operators/allclose_op.cc +++ b/paddle/fluid/operators/allclose_op.cc @@ -22,9 +22,11 @@ namespace operators { class AllcloseOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("Input", "The first input tensor to compare."); - AddInput("Other", "The second input tensor to compare."); - AddOutput("Out", "The output tensor of allclose op."); + AddInput("Input", + "The input tensor, it's data type should be float32, float64."); + AddInput("Other", + "The input tensor, it's data type should be float32, float64."); + AddOutput("Out", "The output tensor, it's data type is bool."); AddAttr("rtol", "The relative tolerance. Default: :math:`1e-5` .") .SetDefault(1e-5); @@ -36,11 +38,12 @@ class AllcloseOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(false); AddComment(R"DOC( -This operator checks if all :math:`input` and :math:`other` satisfy the condition: +This operator checks if all :math:`x` and :math:`y` satisfy the condition: -:math:`\left| input - other \right| \leq atol + rtol \times \left| other \right|` +.. math:: + \left| x - y \right| \leq atol + rtol \times \left| y \right| -elementwise, for all elements of :math:`input` and :math:`other`. The behaviour of this +elementwise, for all elements of :math:`x` and :math:`y`. The behaviour of this operator is analogous to :math:`numpy.allclose`, namely that it returns :math:`True` if two tensors are elementwise equal within a tolerance. )DOC"); diff --git a/paddle/fluid/operators/arg_max_op.cu b/paddle/fluid/operators/arg_max_op.cu index 85e4f98173511435a52b32e506afc8d5b772f74f..14708c4df10f5160d0e72e7669e0015554d8215f 100644 --- a/paddle/fluid/operators/arg_max_op.cu +++ b/paddle/fluid/operators/arg_max_op.cu @@ -1,29 +1,22 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/arg_min_max_op_base.h" - -REGISTER_OP_CUDA_KERNEL( - arg_max, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel); +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/arg_min_max_op_base.cu.h" + +REGISTER_OP_CUDA_KERNEL( + arg_max, paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel); diff --git a/paddle/fluid/operators/arg_min_max_op_base.cu.h b/paddle/fluid/operators/arg_min_max_op_base.cu.h new file mode 100644 index 0000000000000000000000000000000000000000..73581dac4e419ca9c970db4414ff54d4cbd3fd70 --- /dev/null +++ b/paddle/fluid/operators/arg_min_max_op_base.cu.h @@ -0,0 +1,192 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef __NVCC__ + +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/transpose_op.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace operators { + +namespace { // NOLINT +template +using KeyValuePair = cub::KeyValuePair; +using Tensor = framework::Tensor; + +} // end namespace + +#define FIXED_BLOCK_DIM_CASE_BASE(log2_block_dim, ...) \ + case (1 << (log2_block_dim)): { \ + constexpr auto kBlockDim = (1 << (log2_block_dim)); \ + __VA_ARGS__; \ + } break + +#define FIXED_BLOCK_DIM_CASE(...) \ + FIXED_BLOCK_DIM_CASE_BASE(10, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(9, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(8, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(7, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(6, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(5, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(4, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(3, ##__VA_ARGS__); + +template +__global__ void ArgCUDAKernel(const int64_t height, // n * h + const int64_t width, // c + const int64_t post_size, // h + const Reducer reducer, const T init, const T* in, + IndType* out) { + typedef cub::BlockReduce, BlockDim> BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + + for (int idx = blockIdx.x; idx < height; idx += gridDim.x) { + KeyValuePair kv_pair = {-1, init}; + int h = idx / post_size; + int w = idx % post_size; + for (int k = threadIdx.x; k < width; k += blockDim.x) { + kv_pair = + reducer({k, in[h * width * post_size + k * post_size + w]}, kv_pair); + } + kv_pair = BlockReduce(temp_storage).Reduce(kv_pair, reducer); + if (threadIdx.x == 0) { + out[idx] = static_cast(kv_pair.key); + } + __syncthreads(); + } +} + +template +void ComputeFullArg(const platform::CUDADeviceContext& ctx, const Tensor& input, + Tensor* indices, const int64_t pre, const int64_t post, + const int64_t n) { + auto cu_stream = ctx.stream(); + auto ComputeBlockSize = [](int64_t col) { + if (col > 512) + return 1024; + else if (col > 256) + return 512; + else if (col > 128) + return 256; + else if (col > 64) + return 128; + else if (col > 32) + return 64; + else if (col > 16) + return 32; + else if (col > 8) + return 16; + else + return 8; + }; + + int64_t max_grid_dimx = ctx.GetCUDAMaxGridDimSize().x; + int64_t height = pre * post; + int64_t width = n; + int64_t grid_size = height < max_grid_dimx ? height : max_grid_dimx; + + const T* in_data = input.data(); + IndType* out_data = indices->mutable_data(ctx.GetPlace()); + + if (typeid(Reducer) == typeid(cub::ArgMax)) { + switch (ComputeBlockSize(width)) { + FIXED_BLOCK_DIM_CASE( + ArgCUDAKernel<<>>( + height, width, post, Reducer(), std::numeric_limits::lowest(), + in_data, out_data)); + } + } else { + switch (ComputeBlockSize(width)) { + FIXED_BLOCK_DIM_CASE( + ArgCUDAKernel<<>>( + height, width, post, Reducer(), std::numeric_limits::max(), + in_data, out_data)); + } + } +} + +template +struct VisitDataCudaArgMinMaxFunctor { + const framework::ExecutionContext& ctx; + + explicit VisitDataCudaArgMinMaxFunctor(const framework::ExecutionContext& ctx) + : ctx(ctx) {} + template + void apply() const { + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + int axis = ctx.Attr("axis"); + const bool& flatten = ctx.Attr("flatten"); + + framework::DDim input_dims; + if (flatten) { + input_dims = framework::make_ddim({input->numel()}); + // if flatten, the axis just as 0 + axis = 0; + } else { + input_dims = input->dims(); + if (axis < 0) axis += input->dims().size(); + } + + int64_t numel = input->numel(); + int64_t groups = numel / input_dims[axis]; + int64_t pre = 1; + int64_t post = 1; + int64_t n = input_dims[axis]; + + for (int i = 0; i < axis; i++) { + pre *= input_dims[i]; + } + + for (int i = axis + 1; i < input_dims.size(); i++) { + post *= input_dims[i]; + } + + const auto& dev_ctx = ctx.cuda_device_context(); + ComputeFullArg(dev_ctx, *input, output, pre, post, n); + } +}; +template +class ArgMinMaxOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dtype = ctx.Attr("dtype"); + if (dtype < 0) { + framework::VisitDataType(static_cast( + framework::proto::VarType::INT64), + VisitDataCudaArgMinMaxFunctor(ctx)); + return; + } + framework::VisitDataType( + static_cast(dtype), + VisitDataCudaArgMinMaxFunctor(ctx)); + } +}; + +#endif + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/arg_min_max_op_base.h b/paddle/fluid/operators/arg_min_max_op_base.h index 0fc7b47c62ea9d7da805b797fcf5e4db4e39328d..ae3637f6f99783d70bd57a3935a979b0387692de 100644 --- a/paddle/fluid/operators/arg_min_max_op_base.h +++ b/paddle/fluid/operators/arg_min_max_op_base.h @@ -38,8 +38,9 @@ struct ArgMinMaxFunctor {}; struct ArgMinMaxFunctor { \ void operator()(const DeviceContext& ctx, const framework::LoDTensor& in, \ - framework::LoDTensor* out, int64_t axis, bool keepdims) { \ - auto in_eigen = framework::EigenTensor::From(in); \ + framework::LoDTensor* out, framework::DDim x_dims, \ + int64_t axis, bool keepdims) { \ + auto in_eigen = framework::EigenTensor::From(in, x_dims); \ if (keepdims) { \ auto out_eigen = framework::EigenTensor::From(*out); \ out_eigen.device(*(ctx.eigen_device())) = \ @@ -68,16 +69,26 @@ struct VisitDataArgMinMaxFunctor { out.template mutable_data(ctx.GetPlace()); auto axis = ctx.Attr("axis"); auto keepdims = ctx.Attr("keepdims"); - auto x_rank = x.dims().size(); - if (axis < 0) axis += x_rank; + const bool& flatten = ctx.Attr("flatten"); + + // if flatten, will construct the new dims for the cacluate + framework::DDim x_dims; + if (flatten) { + x_dims = framework::make_ddim({x.numel()}); + // if flatten, the axis just as 0 + axis = 0; + } else { + x_dims = x.dims(); + if (axis < 0) axis += x_dims.size(); + } auto& dev_ctx = ctx.template device_context(); #define CALL_ARG_MINMAX_FUNCTOR(rank) \ ArgMinMaxFunctor \ functor##rank; \ - functor##rank(dev_ctx, x, &out, axis, keepdims) + functor##rank(dev_ctx, x, &out, x_dims, axis, keepdims) - switch (x.dims().size()) { + switch (x_dims.size()) { case 1: CALL_ARG_MINMAX_FUNCTOR(1); break; @@ -141,6 +152,7 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { const auto& x_dims = ctx->GetInputDim("X"); int64_t axis = ctx->Attrs().Get("axis"); bool keepdims = ctx->Attrs().Get("keepdims"); + const bool& flatten = ctx->Attrs().Get("flatten"); PADDLE_ENFORCE_GE(axis, -x_dims.size(), platform::errors::InvalidArgument( @@ -152,14 +164,21 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size())); - auto x_rank = x_dims.size(); - if (axis < 0) axis += x_rank; std::vector vec; - for (int64_t i = 0; i < axis; i++) vec.push_back(x_dims[i]); - if (keepdims) { - vec.push_back(static_cast(1)); + if (flatten) { + // if is flatten, will return the only on element + if (keepdims) { + vec.emplace_back(static_cast(1)); + } + } else { + auto x_rank = x_dims.size(); + if (axis < 0) axis += x_rank; + for (int64_t i = 0; i < axis; i++) vec.emplace_back(x_dims[i]); + if (keepdims) { + vec.emplace_back(static_cast(1)); + } + for (int64_t i = axis + 1; i < x_rank; i++) vec.emplace_back(x_dims[i]); } - for (int64_t i = axis + 1; i < x_rank; i++) vec.push_back(x_dims[i]); ctx->SetOutputDim("Out", framework::make_ddim(vec)); } }; @@ -176,6 +195,9 @@ class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("axis", "The axis in which to compute the arg indics."); AddAttr("keepdims", "Keep the dim that to reduce.").SetDefault(false); AddAttr("dtype", "Keep the dim that to reduce.").SetDefault(-1); + AddAttr("flatten", + "Flatten the input value, and search the min or max indices") + .SetDefault(false); AddComment(string::Sprintf(R"DOC( %s Operator. diff --git a/paddle/fluid/operators/arg_min_op.cu b/paddle/fluid/operators/arg_min_op.cu index 47d7c8b12243c6c5c501188af7f48f125c266009..23170bf0087906d752767051ce58874cb3584ee5 100644 --- a/paddle/fluid/operators/arg_min_op.cu +++ b/paddle/fluid/operators/arg_min_op.cu @@ -1,29 +1,21 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/arg_min_max_op_base.h" - -REGISTER_OP_CUDA_KERNEL( - arg_min, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel); +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/arg_min_max_op_base.cu.h" +REGISTER_OP_CUDA_KERNEL( + arg_min, paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel, + paddle::operators::ArgMinMaxOpCUDAKernel); diff --git a/paddle/fluid/operators/bce_loss_op.cc b/paddle/fluid/operators/bce_loss_op.cc index 50797a100b1a67244b7c7b40b47404b60dc6af65..f56789b889526301e670ac37d1d6131aaafb070a 100644 --- a/paddle/fluid/operators/bce_loss_op.cc +++ b/paddle/fluid/operators/bce_loss_op.cc @@ -32,22 +32,29 @@ class BCELossOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "BCELoss"); auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ( - x_dims.size(), label_dims.size(), - platform::errors::InvalidArgument( - "Input(X) and Input(Label) shall have the same shape.")); - bool contain_unknown_dim = framework::contain_unknown_dim(x_dims) || - framework::contain_unknown_dim(label_dims); - bool check = ctx->IsRuntime() || !contain_unknown_dim; + auto labels_dims = ctx->GetInputDim("Label"); + + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, labels_dims.size(), + platform::errors::InvalidArgument( + "Input(X) and Input(Label) shall have the same rank." + "But received: the rank of Input(X) is [%d], " + "the rank of Input(Label) is [%d].", + rank, labels_dims.size())); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + if (check) { - PADDLE_ENFORCE_EQ( - x_dims.size(), label_dims.size(), - platform::errors::InvalidArgument( - "ShapeError: Input(X) and Input(Label) shall have the same shape " - "But received: the shape of Input(X) is [%s], the shape of " - "Input(Label) is [%s].", - x_dims, label_dims)); + PADDLE_ENFORCE_EQ(x_dims, labels_dims, + platform::errors::InvalidArgument( + "Input(X) and Input(Label) shall have the same " + "shape. But received: the shape of Input(X) is " + "[%s], the shape of Input(Label) is [%s].", + x_dims, labels_dims)); } ctx->ShareDim("X", "Out"); @@ -76,20 +83,31 @@ class BCELossGradOp : public framework::OperatorWithKernel { framework::GradVarName("X"), "BCELossGrad"); auto x_dims = ctx->GetInputDim("X"); + auto labels_dims = ctx->GetInputDim("Label"); auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); - bool contain_unknown_dim = framework::contain_unknown_dim(x_dims) || - framework::contain_unknown_dim(dout_dims); - bool check = ctx->IsRuntime() || !contain_unknown_dim; + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(x_dims, labels_dims, + platform::errors::InvalidArgument( + "Input(X) and Input(Label) shall have the same " + "shape. But received: the shape of Input(X) is " + "[%s], the shape of Input(Label) is [%s].", + x_dims, labels_dims)); + PADDLE_ENFORCE_EQ(x_dims, dout_dims, platform::errors::InvalidArgument( - "ShapeError:The Input(X) and Input(Out@Grad) " - "should have the same " - "shape, But received: the shape of Input(X) is " - "[%s], the shape of " - "Input(Out@GRAD) is [%s].", + "Input(X) and Input(Out@Grad) shall have the same " + "shape. But received: the shape of Input(X) is " + "[%s], the shape of Input(Out@Grad) is [%s].", x_dims, dout_dims)); } + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->ShareLoD("X", framework::GradVarName("X")); } diff --git a/paddle/fluid/operators/bce_loss_op.cu b/paddle/fluid/operators/bce_loss_op.cu index 8e30f4eb15b6afde885512206c7eaeb721cdd44b..16db4f05e31d365d8d06174ab708e30474b8a8c2 100644 --- a/paddle/fluid/operators/bce_loss_op.cu +++ b/paddle/fluid/operators/bce_loss_op.cu @@ -67,7 +67,8 @@ class BCELossCUDAKernel : public framework::OpKernel { auto x_data = x->data(); auto out_data = out->mutable_data(ctx.GetPlace()); - int x_numel = x->numel(); + auto x_numel = x->numel(); + platform::GpuLaunchConfig config = platform::getGpuLaunchConfig(x_numel, ctx); @@ -75,7 +76,7 @@ class BCELossCUDAKernel : public framework::OpKernel { framework::TensorCopy(*x, platform::CPUPlace(), &x_cpu); T* x_cpu_data = x_cpu.data(); - for (int i = 0; i < x_numel; ++i) { + for (int64_t i = 0; i < x_numel; ++i) { PADDLE_ENFORCE_GE( x_cpu_data[i], static_cast(0), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/bce_loss_op.h b/paddle/fluid/operators/bce_loss_op.h index 85e120e4642a298ebff00fc0e4b6425f775443aa..dd87b69efe2869727f2db778cec44612efbcff6b 100644 --- a/paddle/fluid/operators/bce_loss_op.h +++ b/paddle/fluid/operators/bce_loss_op.h @@ -34,11 +34,11 @@ class BCELossOpKernel : public framework::OpKernel { auto x_data = x->data(); auto label_data = labels->data(); auto out_data = out->mutable_data(ctx.GetPlace()); - int x_numel = x->numel(); + auto x_numel = x->numel(); // out = -(label * ln(x) + (1 - label) * ln(1 - x)) = (label - 1) * ln(1 - // x) - label * ln(x) - for (int i = 0; i < x_numel; ++i) { + for (int64_t i = 0; i < x_numel; ++i) { PADDLE_ENFORCE_GE( x_data[i], static_cast(0), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/bernoulli_op.cc b/paddle/fluid/operators/bernoulli_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c525da5953d76d4406fbdd0d9d6e98619e409f71 --- /dev/null +++ b/paddle/fluid/operators/bernoulli_op.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/bernoulli_op.h" + +#include +#include + +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/common_infer_shape_functions.h" + +namespace paddle { +namespace operators { + +class BernoulliOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "A tensor with probabilities for generating the random binary " + "number"); + AddOutput("Out", "A Tensor filled with random binary number"); + AddComment(R"DOC( +This OP returns a Tensor filled with random binary(0 or 1) number from a Bernoulli distribution. + + Out ~ Bernoulli(X) + +)DOC"); + } +}; + +class BernoulliOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + return UnaryOpUnchangedInferShape(ctx); + } +}; + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class BernoulliOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); + auto *in_data = x->data(); + auto *out_data = out->mutable_data(ctx.GetPlace()); + + int64_t size = x->numel(); + std::uniform_real_distribution dist(0.0, 1.0); + auto gen_ptr = framework::Generator::GetInstance(); + std::mt19937_64 &gen_engine = gen_ptr->GetCPUEngine(); + + for (int64_t i = 0; i < size; ++i) { + out_data[i] = BernoulliFunctor(in_data[i], dist(gen_engine)); + } + } +}; // namespace operators + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OPERATOR( + bernoulli, ops::BernoulliOp, ops::BernoulliOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL(bernoulli, + ops::BernoulliOpKernel, + ops::BernoulliOpKernel); diff --git a/paddle/fluid/operators/bernoulli_op.cu b/paddle/fluid/operators/bernoulli_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..d0837071d456068f64ebc74b115f1a7904eba41c --- /dev/null +++ b/paddle/fluid/operators/bernoulli_op.cu @@ -0,0 +1,72 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/bernoulli_op.h" +#include "paddle/fluid/platform/transform.h" + +namespace paddle { +namespace operators { +// it can be consistent with cpu when CUDAGenerator is provided. +template +struct BernoulliCudaFunctor { + unsigned int seed_; + __host__ __device__ BernoulliCudaFunctor(int seed) : seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n, const T p) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(0.0, 1.0); + rng.discard(n); + return static_cast(dist(rng) < p); + } +}; + +template +class BernoulliOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + std::random_device rd; + auto seed = rd(); + const auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); + auto* in_data = x->data(); + auto* out_data = out->mutable_data(ctx.GetPlace()); + + int64_t size = x->numel(); + thrust::counting_iterator index_sequence_begin(0); + platform::Transform trans; + auto* context = + static_cast(&ctx.device_context()); + trans(*context, index_sequence_begin, index_sequence_begin + size, in_data, + out_data, BernoulliCudaFunctor(seed)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + bernoulli, ops::BernoulliOpKernel, + ops::BernoulliOpKernel); diff --git a/paddle/fluid/operators/bernoulli_op.h b/paddle/fluid/operators/bernoulli_op.h new file mode 100644 index 0000000000000000000000000000000000000000..06a83ada17bb926d6f7d4eef10750986d00f048c --- /dev/null +++ b/paddle/fluid/operators/bernoulli_op.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/hostdevice.h" + +namespace paddle { +namespace operators { + +/** + * Samples a bernoulli distribution given a probability input + */ + +template +inline HOSTDEVICE T BernoulliFunctor(T p, T rand) { + PADDLE_ENFORCE_LE(p, 1, platform::errors::OutOfRange( + "The probability should be <= 1, but got %f", p)); + PADDLE_ENFORCE_GE(p, 0, platform::errors::OutOfRange( + "The probability should be >= 1, but got %f", p)); + return static_cast(rand < p); +} + +template +class BernoulliOpKernel; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/cholesky_op.cu b/paddle/fluid/operators/cholesky_op.cu index c44299686516e968692fe146a5c324c7f1fa83d2..530147609fe1e47320a1cbd9223ccdfb82ba7e7a 100644 --- a/paddle/fluid/operators/cholesky_op.cu +++ b/paddle/fluid/operators/cholesky_op.cu @@ -63,7 +63,6 @@ class CholeskyGPUKernel : public framework::OpKernel { for_range(matrix_band_part_functor); } - // TODO(guosheng): Add callback to check info auto info = memory::Alloc(dev_ctx, sizeof(int) * batch_count); auto* info_ptr = reinterpret_cast(info->ptr()); @@ -96,6 +95,20 @@ class CholeskyGPUKernel : public framework::OpKernel { #if CUDA_VERSION >= 9020 && !defined(_WIN32) } #endif + // check the info + std::vector error_info; // only for checking positive matrix + error_info.resize(batch_count); + + memory::Copy(platform::CPUPlace(), error_info.data(), + BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()), + info_ptr, sizeof(int) * batch_count, dev_ctx.stream()); + + for (int i = 0; i < batch_count; ++i) { + PADDLE_ENFORCE_EQ(error_info[i], 0, + platform::errors::PreconditionNotMet( + "For batch [%d]: U(%d, %d) is zero, singular U.", i, + error_info[i], error_info[i])); + } } void Potrf(const platform::CUDADeviceContext& dev_ctx, cublasFillMode_t uplo, diff --git a/paddle/fluid/operators/cholesky_op.h b/paddle/fluid/operators/cholesky_op.h index b0280b00ecf447d36b199e6b6765fa7928e081f0..15dd8315362ed0221c5c8b9c523af37da38dfd7e 100644 --- a/paddle/fluid/operators/cholesky_op.h +++ b/paddle/fluid/operators/cholesky_op.h @@ -59,22 +59,24 @@ class CholeskyCPUKernel : public framework::OpKernel { Eigen::Matrix, Eigen::UpLoType::Upper> llt_decomposition(input); - PADDLE_ENFORCE_EQ( - llt_decomposition.info(), Eigen::Success, - platform::errors::InvalidArgument( - "Cholesky decomposition was not successful. The input matrice " - "might not be not be positive definite.")); + PADDLE_ENFORCE_EQ(llt_decomposition.info(), Eigen::Success, + platform::errors::InvalidArgument( + "Cholesky decomposition was not successful. The " + "%d-th input matrice " + "might not be not be positive definite.", + i)); output = llt_decomposition.matrixU(); } else { Eigen::LLT< Eigen::Matrix, Eigen::UpLoType::Lower> llt_decomposition(input); - PADDLE_ENFORCE_EQ( - llt_decomposition.info(), Eigen::Success, - platform::errors::InvalidArgument( - "Cholesky decomposition was not successful. The input matrice " - "might not be not be positive definite.")); + PADDLE_ENFORCE_EQ(llt_decomposition.info(), Eigen::Success, + platform::errors::InvalidArgument( + "Cholesky decomposition was not successful. The " + "%d-th input matrice " + "might not be not be positive definite.", + i)); output = llt_decomposition.matrixL(); } } diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..425351877689f7e3ad8e0a46d2226f5f751a4016 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace paddle { +namespace operators { + +class CReduceMaxOpMaker : public CReduceOpMaker { + protected: + std::string GetName() const override { return "Max"; } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_WITHOUT_GRADIENT(c_reduce_max, ops::CReduceOp, + ops::CReduceMaxOpMaker); + +REGISTER_OP_CPU_KERNEL(c_reduce_max, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel); diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..7e260346b4bdd8aced0df59c72f5adb4c479e8d0 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc @@ -0,0 +1,25 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(c_reduce_max, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel) diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8e849641e639eeceb48fc95656b269988c827006 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace paddle { +namespace operators { + +class CReduceMinOpMaker : public CReduceOpMaker { + protected: + std::string GetName() const override { return "Min"; } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_WITHOUT_GRADIENT(c_reduce_min, ops::CReduceOp, + ops::CReduceMinOpMaker); + +REGISTER_OP_CPU_KERNEL(c_reduce_min, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel); diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..77a75ed0b7af2a7946c02bfa0f33038aa0090c5b --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc @@ -0,0 +1,25 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(c_reduce_min, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel) diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7474a6a7c27f8d010c23c633fa1918b164de33ea --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -0,0 +1,151 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" + +#if defined(PADDLE_WITH_NCCL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/nccl_helper.h" +#endif + +namespace paddle { +namespace operators { + +enum ReduceType { kRedSum, kRedMax, kRedMin, kRedProd }; + +class CReduceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +template +class CReduceOpCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE_EQ( + true, false, + platform::errors::Unavailable("Unimplemented CReduceOpCPUKernel now.")); + } +}; + +template +class CReduceOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { +#if defined(PADDLE_WITH_NCCL) + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); + + auto place = ctx.GetPlace(); + ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + int64_t numel = in->numel(); + const void* sendbuff = in->data(); + out->Resize(in->dims()); + void* recvbuff = out->mutable_data(place); + + int rid = ctx.Attr("ring_id"); + int root = ctx.Attr("root_id"); + auto comm = platform::NCCLCommContext::Instance().Get(rid, place); + + cudaStream_t stream = nullptr; + if (ctx.Attr("use_calc_stream")) { + auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); + stream = static_cast(dev_ctx)->stream(); + } else { + stream = comm->stream(); + } + + ncclRedOp_t nccl_red_type = ncclSum; + switch (red_type) { + case kRedSum: + nccl_red_type = ncclSum; + break; + + case kRedMax: + nccl_red_type = ncclMax; + break; + + case kRedMin: + nccl_red_type = ncclMin; + break; + + case kRedProd: + nccl_red_type = ncclProd; + break; + + default: + PADDLE_ENFORCE_EQ(true, false, platform::errors::InvalidArgument( + "red_type must be one of kRedSum, " + "kRedMax, kRedMin, kRedProd.")); + } + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce( + sendbuff, recvbuff, numel, dtype, nccl_red_type, root, comm->comm(), + stream)); +#else + PADDLE_ENFORCE_EQ(true, false, + platform::errors::Unavailable( + "PaddlePaddle should compile with GPU..")); +#endif + } +}; + +class CReduceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() { + AddInput("X", "(Tensor), tensor to be reduced."); + AddOutput("Out", "(Tensor) the reduced result."); + AddAttr("ring_id", "(int default 0) communication ring id.") + .SetDefault(0); + AddAttr("root_id", "(int default 0) root id.").SetDefault(0); + AddAttr( + "use_calc_stream", + "(bool default false) eject CUDA operations to calculation stream.") + .SetDefault(false); + AddComment(string::Sprintf(R"DOC( +CReduce %s Operator + +Call collective Reduce with reduce type %s. If input and output are +the same variable, in-place reduce will be used. +)DOC", + GetName(), GetName())); + } + + protected: + virtual std::string GetName() const = 0; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..64935df856ec79f427bdcd21e03b7c493c31ac1e --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace paddle { +namespace operators { + +class CReduceProdOpMaker : public CReduceOpMaker { + protected: + std::string GetName() const override { return "Prod"; } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_WITHOUT_GRADIENT(c_reduce_prod, ops::CReduceOp, + ops::CReduceProdOpMaker); + +REGISTER_OP_CPU_KERNEL(c_reduce_prod, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel) diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..07e431f7bc838caa9bc3abdcd0be1beb94b96635 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc @@ -0,0 +1,25 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(c_reduce_prod, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel) diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3e20cee7e186a462aedc1881c6e34cacc8d09de0 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace paddle { +namespace operators { + +class CReduceSumOpMaker : public CReduceOpMaker { + protected: + std::string GetName() const override { return "Sum"; } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_WITHOUT_GRADIENT(c_reduce_sum, ops::CReduceOp, + ops::CReduceSumOpMaker); + +REGISTER_OP_CPU_KERNEL(c_reduce_sum, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel, + ops::CReduceOpCPUKernel) diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..d9826422c16cb67f9f7101643918a83898c606b3 --- /dev/null +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc @@ -0,0 +1,25 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_reduce_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(c_reduce_sum, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel, + ops::CReduceOpCUDAKernel) diff --git a/paddle/fluid/operators/collective/c_scatter_op.cc b/paddle/fluid/operators/collective/c_scatter_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..908708e6e328f54466d4bb69b30fd607e14d1fe9 --- /dev/null +++ b/paddle/fluid/operators/collective/c_scatter_op.cc @@ -0,0 +1,92 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_scatter_op.h" + +namespace paddle { +namespace operators { + +class CScatterOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "CScatter"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "CScatter"); + int root_id = ctx->Attrs().Get("root"); + int ring_id = ctx->Attrs().Get("ring_id"); + int nranks = ctx->Attrs().Get("nranks"); + PADDLE_ENFORCE_GE(nranks, 2, + platform::errors::InvalidArgument( + "The number of ranks (%d) must be greater than 1 " + "to use collective op (c_scatter op).", + nranks)); + PADDLE_ENFORCE_GE( + root_id, 0, + platform::errors::InvalidArgument( + "The root_id (%d) for c_scatter_op must be non-negative.", + root_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::InvalidArgument( + "The ring_id (%d) for c_scatter_op must be non-negative.", + root_id)); + framework::DDim dim = ctx->GetInputDim("X"); + dim[0] = dim[0] / nranks; + if (dim[0] < 0) dim[0] = -1; + ctx->SetOutputDim("Out", dim); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +class CScatterOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() { + AddInput("X", "(Tensor) tensor to be broadcasted."); + AddOutput("Out", "(Tensor) the result of broadcast."); + AddAttr("ring_id", "(int default 0) nccl communication ring id.") + .SetDefault(0); + AddAttr("root", "(int default 0) root id for broadcasting.") + .SetDefault(0); + AddAttr("nranks", "(int default 1) number of ranks.").SetDefault(0); + AddAttr( + "use_calc_stream", + "(bool default false) eject CUDA operations to calculation stream.") + .SetDefault(false); + AddComment(R"DOC( +CScatter Operator +Scatter the source to all participators. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_WITHOUT_GRADIENT(c_scatter, ops::CScatterOp, ops::CScatterOpMaker); + +REGISTER_OP_CPU_KERNEL(c_scatter, ops::CScatterOpCPUKernel, + ops::CScatterOpCPUKernel, + ops::CScatterOpCPUKernel, + ops::CScatterOpCPUKernel, + ops::CScatterOpCPUKernel); diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..8d9e6b4b7d99044f584e9e21062a786252d60f76 --- /dev/null +++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc @@ -0,0 +1,108 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/collective/c_scatter_op.h" + +#if defined(PADDLE_WITH_NCCL) +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/nccl_helper.h" +#endif + +namespace paddle { +namespace operators { + +template +class CScatterOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { +#if defined(PADDLE_WITH_NCCL) + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); + int numel = x->numel(); + ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + + int nranks = ctx.Attr("nranks"); + int root_id = ctx.Attr("root"); + int ring_id = ctx.Attr("ring_id"); + auto place = ctx.GetPlace(); + auto comm = platform::NCCLCommContext::Instance().Get(ring_id, place); + PADDLE_ENFORCE_EQ(nranks, comm->nranks(), + platform::errors::InvalidArgument( + "The number of ranks (%d) you set of must " + "be equal to comm->nranks (%d).", + nranks, comm->nranks())); + PADDLE_ENFORCE_GE( + root_id, 0, + platform::errors::InvalidArgument( + "The root_id (%d) for c_scatter_op must be non-negative.", + root_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::InvalidArgument( + "The ring_id (%d) for c_scatter_op must be non-negative.", + ring_id)); + + cudaStream_t stream = nullptr; + if (ctx.Attr("use_calc_stream")) { + auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); + stream = static_cast(dev_ctx)->stream(); + } else { + stream = comm->stream(); + } + + framework::DDim x_dims = x->dims(); + framework::DDim out_dims(x_dims); + framework::Tensor temp; + auto out_ptr = temp.mutable_data(out_dims, place); + if (root_id == comm->rank()) { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( + reinterpret_cast(const_cast(x->data())), numel, dtype, + root_id, comm->comm(), stream)); + + framework::TensorCopy(*static_cast(x), place, + *platform::DeviceContextPool::Instance().Get(place), + static_cast(&temp)); + } else { + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( + out_ptr, numel, dtype, root_id, comm->comm(), stream)); + } + + out_dims[0] = out_dims[0] / nranks; + auto start_index = out_dims[0] * comm->rank(); + auto end_index = start_index + out_dims[0]; + temp = temp.Slice(start_index, end_index); + temp.Resize(out_dims); + out->mutable_data(out_dims, place); + framework::TensorCopySync(*static_cast(&temp), + place, static_cast(out)); + out->Resize(out_dims); +#else + PADDLE_ENFORCE_EQ( + true, false, + platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); +#endif + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(c_scatter, ops::CScatterOpCUDAKernel, + ops::CScatterOpCUDAKernel, + ops::CScatterOpCUDAKernel, + ops::CScatterOpCUDAKernel, + ops::CScatterOpCUDAKernel); diff --git a/paddle/fluid/operators/collective/c_scatter_op.h b/paddle/fluid/operators/collective/c_scatter_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6aba3dc585821111b5a9291787f505fc5a66219d --- /dev/null +++ b/paddle/fluid/operators/collective/c_scatter_op.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class CScatterOpCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE_EQ(true, false, + platform::errors::Unavailable( + "Unimplemented cpu kernel for CScatterOp.")); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index a8c4107add1beeb9a7a5aedad9be982b6d8b6aac..9ed169fe3502e0c34b9f37d6520edc1a3fbfa91c 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -196,7 +196,7 @@ framework::OpKernelType ConvOp::GetKernelTypeForVar( auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_format"); auto dl = framework::StringToDataLayout(data_format); - // Some models may have intentionally set "AnyLayout" for pool + // Some models may have intentionally set "AnyLayout" for conv // op. Treat this as NCHW (default data_format value) if (dl != framework::DataLayout::kAnyLayout) { return framework::OpKernelType(expected_kernel_type.data_type_, diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index b44aa4ce4f893720ef55a7daf1d7b1e757c7480c..25e887ba6675e6c28bcd44c3b57c2ea571c075e3 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -37,6 +37,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { auto filter_dims = ctx->GetInputDim("Filter"); std::vector output_size = ctx->Attrs().Get>("output_size"); + std::vector output_padding = + ctx->Attrs().Get>("output_padding"); std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); std::vector dilations = ctx->Attrs().Get>("dilations"); @@ -78,6 +80,12 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { platform::errors::InvalidArgument( "The Attr(output_size) and Attr(stride) of Op(conv_transpose) " "should be the same.")); + if (output_padding.size()) + PADDLE_ENFORCE_EQ( + output_padding.size(), strides.size(), + platform::errors::InvalidArgument( + "The Attr(output_padding) and Attr(stride) of Op(conv_transpose) " + "should be the same.")); const int64_t C = (data_layout != DataLayout::kNHWC ? in_dims[1] @@ -136,6 +144,27 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { infer_shape + strides[i])); } output_shape.push_back(output_size[i]); + } else if (output_padding.size()) { + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_GE( + output_padding[i], 0, + platform::errors::InvalidArgument( + "output_padding of Op(ConvTransposeOp) should not be " + "less than the 0. But received output_padding = " + "[%s], whose dim %d is less than 0", + framework::make_ddim(output_padding), i)); + PADDLE_ENFORCE_LT( + output_padding[i], std::max(strides[i], dilations[i]), + platform::errors::InvalidArgument( + "output_padding of Op(ConvTransposeOp) should be less " + "than either stride or dilation. But received output_size = " + "[%s], " + "whose dim %d is not less than either stride (%d) or " + "dilation (%d)", + framework::make_ddim(output_size), i, strides[i], + dilations[i])); + } + output_shape.push_back((infer_shape + output_padding[i])); } else { output_shape.push_back(infer_shape); } @@ -223,10 +252,14 @@ void Conv2DTransposeOpMaker::Make() { "The format of output tensor is X (one-dimensional) of size equal" "to the number of output channels. Only used with MKL-DNN.") .AsDispensable(); - AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is the same as input tensor."); + AddAttr>("output_padding", + "(vector default: []), Additional size added " + "to one side of each dimension in the output " + "shape") + .SetDefault({}); AddAttr>("output_size", "(vector default: []), the " "size of the output tensor") @@ -338,6 +371,11 @@ void Conv3DTransposeOpMaker::Make() { "Where N is batch size, C is " "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); + AddAttr>("output_padding", + "(vector default: []), Additional size added " + "to one side of each dimension in the output " + "shape") + .SetDefault({}); AddAttr>("output_size", "(vector default: []), the " "size of the output tensor") diff --git a/paddle/fluid/operators/cudnn_lstm_op.cc b/paddle/fluid/operators/cudnn_lstm_op.cc index 16e2ca464b5c4de6aa65109cd794d17e4dcd6a2a..7081490fd1bf0e26cb8aa90d69a76a5476cef044 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cc @@ -24,34 +24,62 @@ class CudnnLSTMOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("W"), - "Input(Weight) of LSTM should not be null."); - - PADDLE_ENFORCE(ctx->HasInput("InitH"), - "Input(init_h) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("InitC"), - "Input(init_c) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Cache"), - "Input(Cache) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("last_h"), - "Output(last_h) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("last_c"), - "Output(last_c) of LSTM should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasInput("W"), "Input", "W", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasInput("InitH"), "Input", "InitH", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasInput("InitC"), "Input", "InitC", "CudnnLSTM"); + + OP_INOUT_CHECK(ctx->HasOutput("Reserve"), "Output", "Reserve", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasOutput("StateOut"), "Output", "StateOut", + "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasOutput("LastH"), "Output", "LastH", "CudnnLSTM"); + OP_INOUT_CHECK(ctx->HasOutput("LastC"), "Output", "LastC", "CudnnLSTM"); auto in_dims = ctx->GetInputDim("Input"); - PADDLE_ENFORCE_EQ(in_dims.size(), 3, "Input(X)'s rank must be 3."); + auto init_dims = ctx->GetInputDim("InitH"); + PADDLE_ENFORCE_EQ(in_dims.size(), 3, + platform::errors::InvalidArgument( + "The rank of Input in CudnnLSTM must be 3. But " + "received Input's rank is %d.", + in_dims.size())); + PADDLE_ENFORCE_EQ(init_dims.size(), 3, + platform::errors::InvalidArgument( + "The rank of InitH in CudnnLSTM must be 3. But " + "received InitH's rank is %d.", + init_dims.size())); + + PADDLE_ENFORCE_EQ(in_dims[1], init_dims[1], + platform::errors::InvalidArgument( + "The in_dims[1] (Input dims) and init_dims[1] (InitH " + "dims) should be equal. But " + "received in_dims[1] is %d and init_dims[1] is %d.", + in_dims[1], init_dims[1])); + PADDLE_ENFORCE_EQ(in_dims[2], init_dims[2], + platform::errors::InvalidArgument( + "The in_dims[2] (Input dims) and init_dims[2] (InitH " + "dims) should be equal. But " + "received in_dims[2] is %d and init_dims[2] is %d.", + in_dims[2], init_dims[2])); auto out_dims = in_dims; auto hidden_size = ctx->Attrs().Get("hidden_size"); - out_dims[2] = hidden_size; + bool is_bidirec = ctx->Attrs().Get("is_bidirec"); + out_dims[2] = is_bidirec ? hidden_size * 2 : hidden_size; + auto last_dims = init_dims; + last_dims[0] = is_bidirec ? last_dims[0] * 2 : last_dims[0]; ctx->SetOutputDim("Out", out_dims); - ctx->SetOutputDim("last_h", ctx->GetInputDim("InitH")); - ctx->SetOutputDim("last_c", ctx->GetInputDim("InitC")); + ctx->SetOutputDim("LastH", last_dims); + ctx->SetOutputDim("LastC", last_dims); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "Input"), + ctx.device_context()); } }; @@ -84,33 +112,31 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) the learnable hidden-hidden weights." " The shape is (N), where N is total weight size of the LSTM. " " cudnn concatenate all the weight to one Tensor"); - AddInput("Cache", - "The cache of dropout op, a RAW type variable including random " - "number generator states and some descriptors, which is used in " - "cudnn kernel.") - .AsDispensable(); + AddOutput("Reserve", + "(Tensor, a temporary output Tensor to store the reserve_data " + "of cudnn kernel.") + .AsIntermediate(); + AddOutput("StateOut", + "Share memory with State. " + "Store the global drop state when training"); AddOutput("Out", "(Tensor) the hidden state of LSTM operator. " "The shape is ( seq_len x batch_size x hidden_size) if " "is_bidirec is False" "and When is_bidirec is True, the shape will be ( seq_len x " "batch_size x hidden_size * 2) "); - AddOutput("last_h", + AddOutput("LastH", "(Tensor) the hidden state of the last step. " "The shape is ( num_layers x batch_size x hidden_size) if " "is_bidirec is False" "and When is_bidirec is True, the shape will be (num_layers*2 x " "batch_size x hidden_size)"); - AddOutput("last_c", + AddOutput("LastC", "(Tensor) the cell state of the last step" "The shape is ( num_layers x batch_size x hidden_size) if " "is_bidirec is False" "and When is_bidirect is True, the shape will be (num_layers*2 x " "batch_size x hidden_size*2)"); - AddAttr("max_len", - "max length of the LSTM op" - "the first dim of the Input can NOT be greater than max_len") - .SetDefault(20); AddAttr( "dropout_prob", "dropout prob of the dropout op" @@ -120,14 +146,14 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("is_bidirec", "is_bidirec" "if it is bidirectional rnn" - "The will affect the shape of the Out, last_h, and last_c") + "The will affect the shape of the Out, LastH, and LastC") .SetDefault(false); AddAttr("input_size", "input size ot the Input Tensor").SetDefault(10); AddAttr("hidden_size", "hidden size of the LSTM").SetDefault(100); AddAttr("num_layers", "the total layer number of the LSTM") .SetDefault(1); AddAttr("is_test", "True if in test phase.").SetDefault(false); - AddAttr("seed", "seed to used if fix_seed is True").SetDefault(-1); + AddAttr("seed", "seed to used if fix_seed is True").SetDefault(0); AddComment(R"DOC( CUDNN LSTM implementation @@ -172,16 +198,10 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Cache"), - "Input(last_c) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("InitH"), - "Input(init_h) of LSTM should not be null."); - - PADDLE_ENFORCE(ctx->HasInput("InitC"), - "Input(init_c) of LSTM should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "CudnnLSTMGrad"); + OP_INOUT_CHECK(ctx->HasInput("W"), "Input", "W", "CudnnLSTMGrad"); + OP_INOUT_CHECK(ctx->HasInput("InitH"), "Input", "InitH", "CudnnLSTMGrad"); + OP_INOUT_CHECK(ctx->HasInput("InitC"), "Input", "InitC", "CudnnLSTMGrad"); auto SetOutGradDim = [&ctx](const std::string& name) { auto g_name = framework::GradVarName(name); @@ -195,6 +215,12 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel { SetOutGradDim("InitH"); SetOutGradDim("InitC"); } + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } }; template @@ -209,13 +235,12 @@ class CudnnLSTMGradOpMaker : public framework::SingleGradOpMaker { op->SetInput("InitH", this->Input("InitH")); op->SetInput("InitC", this->Input("InitC")); op->SetInput("W", this->Input("W")); - if (this->HasInput("Cache")) { - op->SetInput("Cache", this->Input("Cache")); - } + op->SetInput("Reserve", this->Output("Reserve")); + op->SetInput("StateOut", this->Output("StateOut")); op->SetInput("Out", this->Output("Out")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput(framework::GradVarName("last_c"), this->OutputGrad("last_c")); - op->SetInput(framework::GradVarName("last_h"), this->OutputGrad("last_h")); + op->SetInput(framework::GradVarName("LastC"), this->OutputGrad("LastC")); + op->SetInput(framework::GradVarName("LastH"), this->OutputGrad("LastH")); op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input")); op->SetOutput(framework::GradVarName("W"), this->InputGrad("W")); diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index 579dddee8e82183b778f03595bb4657002262073..37e5e518ea2af9bb437775c8fa7e86816bb1d8ae 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/cudnn_rnn_cache.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/cudnn_desc.h" namespace paddle { namespace operators { @@ -33,8 +34,10 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { auto w = ctx.Input("W"); Tensor *out = ctx.Output("Out"); - Tensor *last_h = ctx.Output("last_h"); - Tensor *last_c = ctx.Output("last_c"); + Tensor *last_h = ctx.Output("LastH"); + Tensor *last_c = ctx.Output("LastC"); + Tensor *reserve = ctx.Output("Reserve"); + Tensor *state_out = ctx.Output("StateOut"); const T *x_data = x->data(); const T *init_h_data = init_h->data(); @@ -46,72 +49,56 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { T *last_h_data = last_h->mutable_data(ctx.GetPlace()); T *last_c_data = last_c->mutable_data(ctx.GetPlace()); - size_t max_len = ctx.Attr("max_len"); float dropout_prob = ctx.Attr("dropout_prob"); bool is_bidirec = ctx.Attr("is_bidirec"); - int input_size = ctx.Attr("input_size"); int hidden_size = ctx.Attr("hidden_size"); int num_layers = ctx.Attr("num_layers"); bool is_test = ctx.Attr("is_test"); + int seed = ctx.Attr("seed"); auto &dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); - auto *cache_var = ctx.InputVar("Cache"); - if (!cache_var) { - // The RAW type cache variable wouldn't be created and broadcasted on - // multi-devices before the first running. - // use parent scope to make cache persistable - auto *scope = const_cast(ctx.scope().parent()); - auto cache_var_name = ctx.InputNames("Cache")[0]; - cache_var = scope->Var(cache_var_name); - } - CudnnRNNCache *cudnn_rnn_cache = nullptr; - if (cache_var->IsInitialized()) { - // const_cast is usually bad. - cudnn_rnn_cache = const_cast(cache_var) - ->GetMutable(); - } else { - // const_cast is usually bad. - cudnn_rnn_cache = const_cast(cache_var) - ->GetMutable(); - std::random_device rnd; - int seed = ctx.Attr("seed"); - if (seed == -1) { - seed = rnd(); - } - - auto input_w_numel = w->numel(); - auto batch_size = x->dims()[1]; - cudnn_rnn_cache->init(handle, ctx.GetPlace(), max_len, batch_size, - input_size, hidden_size, num_layers, dropout_prob, - is_bidirec, seed, input_w_numel); - } - auto run_seq_len = x->dims()[0]; + CudnnRNNCache *cudnn_rnn_cache = new CudnnRNNCache(); + + auto input_w_numel = w->numel(); + auto seq_len = x->dims()[0]; + auto batch_size = x->dims()[1]; + auto input_dim = x->dims()[2]; + size_t reserve_size; + bool state_initialized = state_out->IsInitialized() ? true : false; + cudnnDataType_t cudnn_type = platform::ToCudnnDataType( + framework::ToDataType(std::type_index(typeid(T)))); + cudnn_rnn_cache->init(handle, ctx.GetPlace(), seq_len, batch_size, + input_dim, hidden_size, num_layers, dropout_prob, + is_bidirec, seed, input_w_numel, &reserve_size, + state_out, state_initialized, cudnn_type); + + auto *reserve_data = reserve->mutable_data( + {static_cast(reserve_size)}, ctx.GetPlace()); if (is_test) { // for inference PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardInference( - handle, cudnn_rnn_cache->rnn_desc_, run_seq_len, - cudnn_rnn_cache->x_desc_, x_data, cudnn_rnn_cache->hx_desc_, - init_h_data, cudnn_rnn_cache->cx_desc_, init_c_data, - cudnn_rnn_cache->w_desc_, w_data, cudnn_rnn_cache->y_desc_, out_data, - cudnn_rnn_cache->hy_desc_, last_h_data, cudnn_rnn_cache->cy_desc_, - last_c_data, cudnn_rnn_cache->workspace_data_.data(), + handle, cudnn_rnn_cache->rnn_desc_, seq_len, cudnn_rnn_cache->x_desc_, + x_data, cudnn_rnn_cache->hx_desc_, init_h_data, + cudnn_rnn_cache->cx_desc_, init_c_data, cudnn_rnn_cache->w_desc_, + w_data, cudnn_rnn_cache->y_desc_, out_data, cudnn_rnn_cache->hy_desc_, + last_h_data, cudnn_rnn_cache->cy_desc_, last_c_data, + cudnn_rnn_cache->workspace_data_.data(), cudnn_rnn_cache->workspace_size_)); } else { // for train PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNForwardTraining( - handle, cudnn_rnn_cache->rnn_desc_, run_seq_len, - cudnn_rnn_cache->x_desc_, x_data, cudnn_rnn_cache->hx_desc_, - init_h_data, cudnn_rnn_cache->cx_desc_, init_c_data, - cudnn_rnn_cache->w_desc_, w_data, cudnn_rnn_cache->y_desc_, out_data, - cudnn_rnn_cache->hy_desc_, last_h_data, cudnn_rnn_cache->cy_desc_, - last_c_data, cudnn_rnn_cache->workspace_data_.data(), - cudnn_rnn_cache->workspace_size_, - cudnn_rnn_cache->reserve_data_.data(), - cudnn_rnn_cache->reserve_size_)); + handle, cudnn_rnn_cache->rnn_desc_, seq_len, cudnn_rnn_cache->x_desc_, + x_data, cudnn_rnn_cache->hx_desc_, init_h_data, + cudnn_rnn_cache->cx_desc_, init_c_data, cudnn_rnn_cache->w_desc_, + w_data, cudnn_rnn_cache->y_desc_, out_data, cudnn_rnn_cache->hy_desc_, + last_h_data, cudnn_rnn_cache->cy_desc_, last_c_data, + cudnn_rnn_cache->workspace_data_.data(), + cudnn_rnn_cache->workspace_size_, reserve_data, reserve_size)); } + delete cudnn_rnn_cache; } }; @@ -123,15 +110,13 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { auto *weight = ctx.Input("W"); auto *init_h = ctx.Input("InitH"); auto *init_c = ctx.Input("InitC"); - // auto * last_h = ctx.Input("last_h"); - // auto * last_c = ctx.Input("last_c"); + auto *reserve = ctx.Input("Reserve"); + auto *state_out = ctx.Input("StateOut"); + auto *out = ctx.Input("Out"); auto *out_grad = ctx.Input(framework::GradVarName("Out")); - auto *last_h_grad = ctx.Input(framework::GradVarName("last_h")); - auto *last_c_grad = ctx.Input(framework::GradVarName("last_c")); - - // auto* init_h = ctx.Input("init_h"); - // auto* init_c = ctx.Input("init_c"); + auto *last_h_grad = ctx.Input(framework::GradVarName("LastH")); + auto *last_c_grad = ctx.Input(framework::GradVarName("LastC")); auto *in_grad = ctx.Output(framework::GradVarName("Input")); auto *weight_grad = ctx.Output(framework::GradVarName("W")); @@ -140,116 +125,75 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); - auto *cache_var = ctx.InputVar("Cache"); - PADDLE_ENFORCE(cache_var->IsInitialized()); - CudnnRNNCache *cudnn_rnn_cache = - const_cast(cache_var) - ->GetMutable(); auto input_dims = input->dims(); auto init_h_dims = init_h->dims(); auto init_c_dims = init_c->dims(); - in_grad->mutable_data(ctx.GetPlace()); - weight_grad->mutable_data(ctx.GetPlace()); - math::SetConstant zero; - zero(dev_ctx, in_grad, static_cast(0.0)); - zero(dev_ctx, weight_grad, static_cast(0.0)); - - T *init_h_grad_data = NULL; - if (init_h_grad == nullptr) { - Tensor init_h_grad_temp; - init_h_grad_temp.mutable_data(init_h_dims, ctx.GetPlace()); - zero(dev_ctx, &init_h_grad_temp, static_cast(0.0)); - - init_h_grad_data = init_h_grad_temp.data(); - } else { - init_h_grad->mutable_data(init_h_dims, ctx.GetPlace()); - zero(dev_ctx, init_h_grad, static_cast(0.0)); - init_h_grad_data = init_h_grad->data(); - } - - T *init_c_grad_data = NULL; - if (init_c_grad == nullptr) { - Tensor init_c_grad_temp; - init_c_grad_temp.mutable_data(init_c_dims, ctx.GetPlace()); - zero(dev_ctx, &init_c_grad_temp, static_cast(0.0)); - init_c_grad_data = init_c_grad_temp.data(); - } else { - init_c_grad->mutable_data(init_c_dims, ctx.GetPlace()); - zero(dev_ctx, init_c_grad, static_cast(0.0)); - init_c_grad_data = init_c_grad->data(); - } + auto *weight_data = weight->data(); + auto *init_h_data = init_h->data(); + auto *init_c_data = init_c->data(); + auto *out_data = out->data(); + auto *out_grad_data = out_grad->data(); + auto *last_h_grad_data = last_h_grad->data(); + auto *last_c_grad_data = last_c_grad->data(); - const T *last_h_grad_data = NULL; - if (last_h_grad == nullptr) { - Tensor last_h_grad_temp; - last_h_grad_temp.mutable_data(init_h_dims, ctx.GetPlace()); - zero(dev_ctx, &last_h_grad_temp, static_cast(0.0)); - - last_h_grad_data = (const T *)last_h_grad_temp.data(); - } else { - last_h_grad_data = last_h_grad->data(); - } - - const T *last_c_grad_data = NULL; - if (last_c_grad == nullptr) { - Tensor last_c_grad_temp; - last_c_grad_temp.mutable_data(init_c_dims, ctx.GetPlace()); - zero(dev_ctx, &last_c_grad_temp, static_cast(0.0)); - - last_c_grad_data = (const T *)last_c_grad_temp.data(); - } else { - last_c_grad_data = last_c_grad->data(); - } + math::SetConstant zero; + weight_grad->mutable_data(ctx.GetPlace()); + zero(dev_ctx, weight_grad, static_cast(0.0)); - const T *out_grad_data = NULL; - if (out_grad == nullptr) { - Tensor out_grad_temp; - out_grad_temp.mutable_data(out->dims(), ctx.GetPlace()); - zero(dev_ctx, &out_grad_temp, static_cast(0.0)); + in_grad->mutable_data(input_dims, ctx.GetPlace()); + auto *in_grad_data = in_grad->data(); - out_grad_data = (const T *)out_grad_temp.data(); - } else { - out_grad_data = out_grad->data(); - } + init_h_grad->mutable_data(init_h_dims, ctx.GetPlace()); + auto *init_h_grad_data = init_h_grad->data(); - // zero( dev_ctx, last_h_grad, static_cast(0.0)); - // zero( dev_ctx, last_c_grad, static_cast(0.0)); + init_c_grad->mutable_data(init_c_dims, ctx.GetPlace()); + auto *init_c_grad_data = init_c_grad->data(); - auto out_data = out->data(); - // auto out_grad_data = out_grad->data(); - auto weight_data = weight->data(); - auto init_h_data = init_h->data(); - auto init_c_data = init_c->data(); - auto in_grad_data = in_grad->data(); + float dropout_prob = ctx.Attr("dropout_prob"); + bool is_bidirec = ctx.Attr("is_bidirec"); + int hidden_size = ctx.Attr("hidden_size"); + int num_layers = ctx.Attr("num_layers"); + int seed = ctx.Attr("seed"); + + CudnnRNNCache *cudnn_rnn_cache = new CudnnRNNCache(); + + auto input_w_numel = weight->numel(); + auto seq_len = input_dims[0]; + auto batch_size = input->dims()[1]; + auto input_dim = input->dims()[2]; + size_t reserve_size; + cudnnDataType_t cudnn_type = platform::ToCudnnDataType( + framework::ToDataType(std::type_index(typeid(T)))); + cudnn_rnn_cache->init(handle, ctx.GetPlace(), seq_len, batch_size, + input_dim, hidden_size, num_layers, dropout_prob, + is_bidirec, seed, input_w_numel, &reserve_size, + const_cast(state_out), true, cudnn_type); auto work_data = cudnn_rnn_cache->workspace_data_.data(); - auto reserve_data = cudnn_rnn_cache->reserve_data_.data(); + const uint8_t *reserve_data = reserve->data(); - auto run_seq_len = input_dims[0]; - PADDLE_ENFORCE_LE((size_t)run_seq_len, cudnn_rnn_cache->max_length_, - "cudnn running seq_len CAN not greater max_lengh"); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardData( - handle, cudnn_rnn_cache->rnn_desc_, run_seq_len, - cudnn_rnn_cache->y_desc_, out_data, cudnn_rnn_cache->dy_desc_, - out_grad_data, cudnn_rnn_cache->dhy_desc_, last_h_grad_data, - cudnn_rnn_cache->dcy_desc_, last_c_grad_data, cudnn_rnn_cache->w_desc_, - weight_data, cudnn_rnn_cache->hx_desc_, init_h_data, - cudnn_rnn_cache->cx_desc_, init_c_data, cudnn_rnn_cache->dx_desc_, - in_grad_data, cudnn_rnn_cache->dhx_desc_, init_h_grad_data, - cudnn_rnn_cache->dcx_desc_, init_c_grad_data, work_data, - cudnn_rnn_cache->workspace_size_, reserve_data, - cudnn_rnn_cache->reserve_size_)); + handle, cudnn_rnn_cache->rnn_desc_, seq_len, cudnn_rnn_cache->y_desc_, + out_data, cudnn_rnn_cache->y_desc_, out_grad_data, + cudnn_rnn_cache->hy_desc_, last_h_grad_data, cudnn_rnn_cache->cy_desc_, + last_c_grad_data, cudnn_rnn_cache->w_desc_, weight_data, + cudnn_rnn_cache->hx_desc_, init_h_data, cudnn_rnn_cache->cx_desc_, + init_c_data, cudnn_rnn_cache->x_desc_, in_grad_data, + cudnn_rnn_cache->hx_desc_, init_h_grad_data, cudnn_rnn_cache->cx_desc_, + init_c_grad_data, work_data, cudnn_rnn_cache->workspace_size_, + const_cast(reserve_data), reserve_size)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnRNNBackwardWeights( - handle, cudnn_rnn_cache->rnn_desc_, run_seq_len, - cudnn_rnn_cache->x_desc_, input->data(), cudnn_rnn_cache->hx_desc_, - init_h->data(), cudnn_rnn_cache->y_desc_, out->data(), + handle, cudnn_rnn_cache->rnn_desc_, seq_len, cudnn_rnn_cache->x_desc_, + input->data(), cudnn_rnn_cache->hx_desc_, init_h->data(), + cudnn_rnn_cache->y_desc_, out->data(), cudnn_rnn_cache->workspace_data_.data(), - cudnn_rnn_cache->workspace_size_, cudnn_rnn_cache->dw_desc_, - weight_grad->data(), cudnn_rnn_cache->reserve_data_.data(), - cudnn_rnn_cache->reserve_size_)); + cudnn_rnn_cache->workspace_size_, cudnn_rnn_cache->w_desc_, + weight_grad->data(), const_cast(reserve_data), + reserve_size)); + delete cudnn_rnn_cache; } }; @@ -257,5 +201,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(cudnn_lstm, ops::CudnnLSTMGPUKernel); -REGISTER_OP_CUDA_KERNEL(cudnn_lstm_grad, ops::CudnnLSTMGPUGradKernel); +REGISTER_OP_CUDA_KERNEL(cudnn_lstm, ops::CudnnLSTMGPUKernel, + ops::CudnnLSTMGPUKernel); +REGISTER_OP_CUDA_KERNEL(cudnn_lstm_grad, ops::CudnnLSTMGPUGradKernel, + ops::CudnnLSTMGPUGradKernel); diff --git a/paddle/fluid/operators/cudnn_rnn_cache.h b/paddle/fluid/operators/cudnn_rnn_cache.h index cd33338abc6223a0ae122cbb60f040562b48a761..13a3e7d09b9f628f31bb9ff3b6137acf6d929c5c 100644 --- a/paddle/fluid/operators/cudnn_rnn_cache.h +++ b/paddle/fluid/operators/cudnn_rnn_cache.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/cudnn_helper.h" @@ -24,16 +25,12 @@ struct CudnnRNNCache { CudnnRNNCache() { x_desc_ = NULL; y_desc_ = NULL; - dx_desc_ = NULL; - dy_desc_ = NULL; } ~CudnnRNNCache() { release(); } cudnnRNNDescriptor_t rnn_desc_; cudnnTensorDescriptor_t *x_desc_; cudnnTensorDescriptor_t *y_desc_; - cudnnTensorDescriptor_t *dx_desc_; - cudnnTensorDescriptor_t *dy_desc_; cudnnTensorDescriptor_t hx_desc_; cudnnTensorDescriptor_t cx_desc_; @@ -55,13 +52,9 @@ struct CudnnRNNCache { cudnnFilterDescriptor_t dw_desc_; size_t workspace_size_; - size_t reserve_size_; - framework::Tensor reserve_data_; framework::Tensor workspace_data_; - framework::Tensor dropout_state_; - - size_t max_length_; + size_t seq_length_; float dropout_prob_; bool is_bidirec_; @@ -72,10 +65,12 @@ struct CudnnRNNCache { int num_layers_; int seed_; - void init(cudnnHandle_t handle, const platform::Place &place, size_t max_len, + void init(cudnnHandle_t handle, const platform::Place &place, size_t seq_len, int batch_size, int input_size, int hidden_size, int num_layers, - float dropout_prob, bool is_bidirec, int seed, int weight_numel) { - max_length_ = max_len; + float dropout_prob, bool is_bidirec, int seed, int weight_numel, + size_t *reserve_size_, framework::Tensor *dropout_state_, + bool initialized, cudnnDataType_t cudnn_type) { + seq_length_ = seq_len; batch_size_ = batch_size; input_size_ = input_size; hidden_size_ = hidden_size; @@ -84,55 +79,34 @@ struct CudnnRNNCache { is_bidirec_ = is_bidirec; seed_ = seed; - x_desc_ = new cudnnTensorDescriptor_t[max_length_]; - y_desc_ = new cudnnTensorDescriptor_t[max_length_]; - dx_desc_ = new cudnnTensorDescriptor_t[max_length_]; - dy_desc_ = new cudnnTensorDescriptor_t[max_length_]; - int dim_a[3]; - int stride_a[3]; + const auto numDirections = is_bidirec_ ? 2 : 1; + auto cudnn_size = + cudnn_type == CUDNN_DATA_FLOAT ? sizeof(float) : sizeof(double); + + x_desc_ = new cudnnTensorDescriptor_t[seq_length_]; + y_desc_ = new cudnnTensorDescriptor_t[seq_length_]; + std::vector dims = {batch_size_, input_size_, 1}; + std::vector strides = {input_size_, 1, 1}; + + std::vector dims_y = {batch_size_, hidden_size_ * numDirections, 1}; + std::vector strides_y = {hidden_size_ * numDirections, 1, 1}; - for (size_t i = 0; i < max_length_; ++i) { + for (size_t i = 0; i < seq_length_; ++i) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnCreateTensorDescriptor(&x_desc_[i])); PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnCreateTensorDescriptor(&y_desc_[i])); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&dx_desc_[i])); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&dy_desc_[i])); - dim_a[0] = batch_size_; - dim_a[1] = input_size_; - dim_a[2] = 1; - - stride_a[0] = dim_a[2] * dim_a[1]; - stride_a[1] = dim_a[2]; - stride_a[2] = 1; - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - x_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dx_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); - - dim_a[0] = batch_size_; - dim_a[1] = is_bidirec_ ? hidden_size_ * 2 : hidden_size_; - dim_a[2] = 1; - - stride_a[0] = dim_a[2] * dim_a[1]; - stride_a[1] = dim_a[2]; - stride_a[2] = 1; PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - y_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + x_desc_[i], cudnn_type, 3, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dy_desc_[i], CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + y_desc_[i], cudnn_type, 3, dims_y.data(), strides_y.data())); } - dim_a[0] = num_layers_ * (is_bidirec_ ? 2 : 1); - dim_a[1] = batch_size_; - dim_a[2] = hidden_size_; - - stride_a[0] = dim_a[2] * dim_a[1]; - stride_a[1] = dim_a[2]; - stride_a[2] = 1; + std::vector dims_hx = {num_layers_ * numDirections, batch_size_, + hidden_size_}; + std::vector strides_hx = {hidden_size_ * batch_size_, hidden_size_, 1}; PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnCreateTensorDescriptor(&hx_desc_)); @@ -152,33 +126,44 @@ struct CudnnRNNCache { platform::dynload::cudnnCreateTensorDescriptor(&dcy_desc_)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - hx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + hx_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - cx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + cx_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - hy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + hy_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - cy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + cy_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dhx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + dhx_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dcx_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + dcx_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dhy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + dhy_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - dcy_desc_, CUDNN_DATA_FLOAT, 3, dim_a, stride_a)); + dcy_desc_, cudnn_type, 3, dims_hx.data(), strides_hx.data())); PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnCreateDropoutDescriptor(&dropout_desc_)); size_t state_size; - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); - dropout_state_.Resize({static_cast(state_size)}); - auto *dropout_state_data = dropout_state_.mutable_data(place); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetDropoutDescriptor( - dropout_desc_, handle, dropout_prob_, dropout_state_data, state_size, - seed_)); + if (!initialized) { + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); + dropout_state_->Resize({static_cast(state_size)}); + uint8_t *dropout_state_data = + dropout_state_->mutable_data(place); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetDropoutDescriptor( + dropout_desc_, handle, dropout_prob_, dropout_state_data, state_size, + seed_)); + } else { + uint8_t *dropout_state_data = dropout_state_->data(); + auto dropout_state_dims = dropout_state_->dims(); + state_size = dropout_state_dims[0]; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnRestoreDropoutDescriptor( + dropout_desc_, handle, dropout_prob_, dropout_state_data, + state_size, 0)); + } PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnCreateRNNDescriptor(&rnn_desc_)); @@ -188,12 +173,12 @@ struct CudnnRNNCache { handle, rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT, is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, - CUDNN_RNN_ALGO_STANDARD, CUDNN_DATA_FLOAT)); + CUDNN_RNN_ALGO_STANDARD, cudnn_type)); #else PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNDescriptor( rnn_desc_, hidden_size_, num_layers_, dropout_desc_, CUDNN_LINEAR_INPUT, is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, - CUDNN_DATA_FLOAT)); + cudnn_type)); #endif PADDLE_ENFORCE_CUDA_SUCCESS( @@ -202,48 +187,42 @@ struct CudnnRNNCache { platform::dynload::cudnnCreateFilterDescriptor(&dw_desc_)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNParamsSize( - handle, rnn_desc_, x_desc_[0], &weights_size_, CUDNN_DATA_FLOAT)); + handle, rnn_desc_, x_desc_[0], &weights_size_, cudnn_type)); + + PADDLE_ENFORCE_EQ( + weights_size_, cudnn_size * weight_numel, + platform::errors::InvalidArgument( + "The cudnn lstm and setting weight size should be same.")); - PADDLE_ENFORCE_EQ(weights_size_, sizeof(float) * weight_numel, - "cudnn lstm weight size should be SAME"); int dim_w[3]; - dim_w[0] = weights_size_ / sizeof(float); + dim_w[0] = weights_size_ / cudnn_size; dim_w[1] = 1; dim_w[2] = 1; PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetFilterNdDescriptor( - w_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w)); + w_desc_, cudnn_type, CUDNN_TENSOR_NCHW, 3, dim_w)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetFilterNdDescriptor( - dw_desc_, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, dim_w)); + dw_desc_, cudnn_type, CUDNN_TENSOR_NCHW, 3, dim_w)); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnGetRNNWorkspaceSize( - handle, rnn_desc_, max_length_, x_desc_, &workspace_size_)); + handle, rnn_desc_, seq_length_, x_desc_, &workspace_size_)); PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnGetRNNTrainingReserveSize( - handle, rnn_desc_, max_length_, x_desc_, &reserve_size_)); - - reserve_data_.Resize({static_cast(reserve_size_)}); - reserve_data_.mutable_data(place); + handle, rnn_desc_, seq_length_, x_desc_, reserve_size_)); workspace_data_.Resize({static_cast(workspace_size_)}); workspace_data_.mutable_data(place); } void release() { - for (size_t i = 0; i < max_length_; ++i) { + for (size_t i = 0; i < seq_length_; ++i) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnDestroyTensorDescriptor(x_desc_[i])); PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnDestroyTensorDescriptor(y_desc_[i])); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(dx_desc_[i])); - PADDLE_ENFORCE_CUDA_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(dy_desc_[i])); } delete[] x_desc_; delete[] y_desc_; - delete[] dx_desc_; - delete[] dy_desc_; PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnDestroyTensorDescriptor(hx_desc_)); diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h index c6140483ff5cb8108895546b6a01f058708231fd..956fd5ad3035434fbf3093786319b3f7ab7e7354 100644 --- a/paddle/fluid/operators/cvm_op.h +++ b/paddle/fluid/operators/cvm_op.h @@ -68,8 +68,19 @@ class CVMOpKernel : public framework::OpKernel { // for Input X do not have Lod Information. if (x->NumLevels() == 0) { - for (int i = 0; i < batch_size; i++) { - CvmComputeKernel(use_cvm, item_size, &x_data, &y_data); + if (use_cvm) { + for (int i = 0; i < batch_size; i++) { + int cursor = i * item_size; + y_data[cursor] = log(x_data[cursor] + 1); + y_data[cursor + 1] = log(x_data[cursor + 1] + 1) - y_data[cursor]; + for (int j = 2; j < item_size; j++) { + y_data[cursor + j] = x_data[cursor + j]; + } + } + } else { + for (int i = 0; i < batch_size; i++) { + CvmComputeKernel(use_cvm, item_size, &x_data, &y_data); + } } } else { auto lod = x->lod()[0]; diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 16e1699e12c832d54af14f673577dcc32b015d6d..5cd853758926e622d0f87e6f8bbaba2cf3b9f85e 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -222,10 +222,12 @@ class BipartiteMatchKernel : public framework::OpKernel { } else { auto lod = dist_mat->lod().back(); for (size_t i = 0; i < lod.size() - 1; ++i) { - Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); - BipartiteMatch(one_ins, indices + i * col, dist + i * col); - if (type == "per_prediction") { - ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold); + if (lod[i + 1] > lod[i]) { + Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); + BipartiteMatch(one_ins, indices + i * col, dist + i * col); + if (type == "per_prediction") { + ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold); + } } } } diff --git a/paddle/fluid/operators/diag_v2_op.cc b/paddle/fluid/operators/diag_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..67dc2843345682b2dfe3d568e452461860575544 --- /dev/null +++ b/paddle/fluid/operators/diag_v2_op.cc @@ -0,0 +1,140 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/diag_v2_op.h" +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +class DiagV2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "diag_v2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "diag_v2"); + + auto x_dims = ctx->GetInputDim("X"); + auto offset = ctx->Attrs().Get("offset"); + + if (x_dims.size() == 1UL) { + int64_t size = x_dims[0] + std::abs(offset); + ctx->SetOutputDim("Out", {size, size}); + } else if (x_dims.size() == 2UL) { + int64_t size; + if (offset >= 0) { + size = std::min(x_dims[0], x_dims[1] - offset); + } else { + size = std::min(x_dims[0] + offset, x_dims[1]); + } + ctx->SetOutputDim("Out", {size}); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input tensor X's dimensions of DiagV2Op should be either 1 or " + "2, but received %d.", + x_dims.size())); + } + } +}; + +class DiagV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input tensor. Its shape is either 1-D or 2-D."); + AddOutput("Out", "The output tensor. A square matrix or a vector."); + AddAttr("offset", + "The diagonal offset. A positive value represents " + "superdiagonal, 0 represents the main diagonal, and a " + "negative value represents subdiagonal.") + .SetDefault(0); + AddAttr("padding_value", + "Use this value to fill the area outside the specified " + "diagonal band. Only takes effect when the input is a 1-D " + "Tensor. The default value is 0.") + .SetDefault(0.0f); + AddComment(R"DOC( + If ``x`` is a vector (1-D tensor), a 2-D square tensor whth the elements of ``x`` as the diagonal is returned. + + If ``x`` is a matrix (2-D tensor), a 1-D tensor with the diagonal elements of ``x`` is returned. + + The argument ``offset`` controls the diagonal offset: + + If ``offset`` = 0, it is the main diagonal. + + If ``offset`` > 0, it is superdiagonal. + + If ``offset`` < 0, it is subdiagonal. +)DOC"); + } +}; + +template +class DiagV2Kernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* x_data = X->data(); + auto x_dims = X->dims(); + int offset = context.Attr("offset"); + auto* out = context.Output("Out"); + T* out_data = out->mutable_data(context.GetPlace()); + auto out_dims = out->dims(); + + int64_t i; + if (x_dims.size() == 1) { + float padding_value = context.Attr("padding_value"); + math::SetConstant set_padding_value; + auto& dev_ctx = context.template device_context(); + set_padding_value(dev_ctx, out, static_cast(padding_value)); + + auto x_length = x_dims[0]; + const int& x_stride = ComputeStride(0, x_dims); + + auto out_stride_0 = ComputeStride(0, out_dims); + auto out_stride_1 = ComputeStride(1, out_dims); + out_data += + (offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0); + + for (i = 0; i < x_length; i++) { + out_data[i * (out_stride_0 + out_stride_1)] = x_data[i * x_stride]; + } + } else { + auto out_length = out_dims[0]; + const int& x_stride_0 = ComputeStride(0, x_dims); + const int& x_stride_1 = ComputeStride(1, x_dims); + + auto out_stride_0 = ComputeStride(0, out_dims); + x_data += (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0); + for (i = 0; i < out_length; i++) { + out_data[i * out_stride_0] = x_data[i * (x_stride_0 + x_stride_1)]; + } + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + diag_v2, ops::DiagV2Op, ops::DiagV2OpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + diag_v2, ops::DiagV2Kernel, + ops::DiagV2Kernel, + ops::DiagV2Kernel, + ops::DiagV2Kernel); diff --git a/paddle/fluid/operators/diag_v2_op.cu b/paddle/fluid/operators/diag_v2_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..4386cc6b8183c03b4d4a19aba7d1126eac2ab495 --- /dev/null +++ b/paddle/fluid/operators/diag_v2_op.cu @@ -0,0 +1,122 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/diag_v2_op.h" + +namespace paddle { +namespace operators { + +// Extract the diagonal of a matrix 'x' to a vector 'out'. +template +__global__ void ExtractDiagonalKernel(T* out, const T* x, std::ptrdiff_t start, + std::ptrdiff_t size, + const std::ptrdiff_t sumStride, + const std::ptrdiff_t outStride) { + for (std::ptrdiff_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; + idx += gridDim.x * blockDim.x) { + const std::ptrdiff_t xOffset = start + sumStride * idx; + out[outStride * idx] = x[xOffset]; + } +} + +// Paste a vector 'x' to the diagonal of a matrix 'out' +template +__global__ void PasteDiagonalKernel(T* out, const T* x, std::ptrdiff_t start, + std::ptrdiff_t x_length, + const std::ptrdiff_t sumStride, + const std::ptrdiff_t xStride) { + for (std::ptrdiff_t idx = blockIdx.x * blockDim.x + threadIdx.x; + idx < x_length; idx += gridDim.x * blockDim.x) { + const std::ptrdiff_t outOffset = start + sumStride * idx; + out[outOffset] = x[xStride * idx]; + } +} + +template +class DiagV2CUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* x_data = X->data(); + auto x_dims = X->dims(); + int offset = context.Attr("offset"); + auto* out = context.Output("Out"); + T* out_data = out->mutable_data(context.GetPlace()); + auto out_dims = out->dims(); + auto& dev_ctx = context.template device_context(); + + if (x_dims.size() == 1) { + float padding_value = context.Attr("padding_value"); + math::SetConstant set_padding_value; + set_padding_value(dev_ctx, out, static_cast(padding_value)); + + auto x_length = x_dims[0]; + auto size = (offset > 0) ? x_length + offset : x_length - offset; + const int& x_stride = ComputeStride(0, x_dims); + if (size > 0) { + const int block_num = std::min(static_cast(size), + dev_ctx.GetMaxPhysicalThreadCount()); + int size_ = static_cast(size); + int block_num_ = static_cast(block_num); + const int grid_num = + std::min(1024, (size_ + block_num_ - 1) / block_num_); + const auto& out_stride_0 = ComputeStride(0, out_dims); + const auto& out_stride_1 = ComputeStride(1, out_dims); + auto start = + (offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0); + + PasteDiagonalKernel<<>>( + out_data, x_data, start, x_length, out_stride_0 + out_stride_1, + x_stride); + } + } else { + const int& x_stride_0 = ComputeStride(0, x_dims); + const int& x_stride_1 = ComputeStride(1, x_dims); + + int size; + if (offset > 0) { + size = std::min(x_dims[0], x_dims[1] - offset); + } else { + size = std::min(x_dims[0] + offset, x_dims[1]); + } + + if (size > 0) { + const int block_num = std::min(static_cast(size), + dev_ctx.GetMaxPhysicalThreadCount()); + int size_ = static_cast(size); + int block_num_ = static_cast(block_num); + const int grid_num = + std::min(1024, (size_ + block_num_ - 1) / block_num_); + auto start = (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0); + const auto& out_stride_0 = ComputeStride(0, out_dims); + + ExtractDiagonalKernel<<>>( + out_data, x_data, start, size, x_stride_0 + x_stride_1, + out_stride_0); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + diag_v2, ops::DiagV2CUDAKernel, + ops::DiagV2CUDAKernel, + ops::DiagV2CUDAKernel, + ops::DiagV2CUDAKernel); diff --git a/paddle/fluid/operators/diag_v2_op.h b/paddle/fluid/operators/diag_v2_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7850def06117ff4232afe4fca95a3e3e500e876d --- /dev/null +++ b/paddle/fluid/operators/diag_v2_op.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using DDim = framework::DDim; + +static inline int ComputeStride(int axis, DDim dims) { + int size = 1; + for (int i = axis + 1; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed/CMakeLists.txt b/paddle/fluid/operators/distributed/CMakeLists.txt index cff3993a068ceee1947ca3e17b9cc6a75e3c9ba9..a033611f478f9ea44fd49ab2015e78aaea6aacd9 100644 --- a/paddle/fluid/operators/distributed/CMakeLists.txt +++ b/paddle/fluid/operators/distributed/CMakeLists.txt @@ -61,7 +61,7 @@ cc_test(varhandle_test SRCS varhandle_test.cc DEPS profiler scope) cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory) cc_library(parameter_send SRCS parameter_send.cc DEPS sendrecvop_rpc memory) cc_library(parameter_recv SRCS parameter_recv.cc DEPS sendrecvop_rpc memory) -cc_library(communicator SRCS communicator.cc DEPS scope selected_rows tensor variable_helper selected_rows_functor simple_threadpool parameter_send parameter_recv) +cc_library(communicator SRCS communicator.cc DEPS scope selected_rows tensor variable_helper selected_rows_functor simple_threadpool parameter_send parameter_recv generator) cc_test(communicator_test SRCS communicator_test.cc DEPS communicator) if(WITH_GPU) cc_test(collective_server_test SRCS collective_server_test.cc diff --git a/paddle/fluid/operators/distributed/large_scale_kv.h b/paddle/fluid/operators/distributed/large_scale_kv.h index fb7a0691154de768d4b828ee5d7b6a47755225f4..0d7032e286caab93dbd38f35881e9064694a8307 100644 --- a/paddle/fluid/operators/distributed/large_scale_kv.h +++ b/paddle/fluid/operators/distributed/large_scale_kv.h @@ -28,6 +28,7 @@ #include // NOLINT #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/selected_rows.h" @@ -96,7 +97,12 @@ class UniformInitializer : public Initializer { dist_ = std::uniform_real_distribution(min_, max_); } - float GetValue() override { return dist_(random_engine_); } + float GetValue() override { + return framework::Generator::GetInstance()->is_init_py + ? dist_(framework::Generator::GetInstance()->GetCPUEngine()) + : dist_(random_engine_); + // return dist_(random_engine_); + } private: float min_; @@ -141,7 +147,12 @@ class GaussianInitializer : public Initializer { dist_ = std::normal_distribution(mean_, std_); } - float GetValue() override { return dist_(random_engine_); } + float GetValue() override { + return framework::Generator::GetInstance()->is_init_py + ? dist_(framework::Generator::GetInstance()->GetCPUEngine()) + : dist_(random_engine_); + // return dist_(random_engine_); + } private: float std_; diff --git a/paddle/fluid/operators/distributed/parameter_prefetch.cc b/paddle/fluid/operators/distributed/parameter_prefetch.cc index 5a67b358ddabb12566cd4ffe00cb12c65a185099..a9378d61c3ca39bd43b558633cc4d04c40175cac 100644 --- a/paddle/fluid/operators/distributed/parameter_prefetch.cc +++ b/paddle/fluid/operators/distributed/parameter_prefetch.cc @@ -110,7 +110,7 @@ void prefetch_core( int pservers = context.Attr("pserver_num"); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &actual_ctx = *pool.Get(context.GetPlace()); + auto &actual_ctx = *pool.Get(platform::CPUPlace()); std::unique_ptr local_scope = scope.NewTmpScope(); @@ -144,7 +144,6 @@ void prefetch_core( VLOG(3) << "don't send no-initialied variable: " << out_var_names[i]; } } - for (size_t i = 0; i < rets.size(); i++) { PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, platform::errors::ExecutionTimeout( "internal error in RPCClient")); @@ -167,6 +166,7 @@ void prefetch_core( for (int64_t i = 0; i < dims[0]; ++i) { auto origin_id = ids_in_this_section[i]; std::vector vecs(row_numel); + std::copy_n(out_var_data + i * row_numel, row_numel, vecs.begin()); (*recved_vec_map)[origin_id] = vecs; } @@ -213,18 +213,18 @@ void prefetchs(const std::vector &id_var_names, const auto place = scope.FindVar(id_var_names[0])->Get().place(); - if (!platform::is_cpu_place(place)) { - PADDLE_THROW("multi prefetch only support CPU currently"); - } - + std::vector> ids_group; std::vector ids_union; + std::vector ids_lods; TableAndEndpoints tables; for (auto &id_name : id_var_names) { - auto *in_var = scope.FindVar(id_name); - auto &id_tensor = in_var->Get(); - std::copy_n(id_tensor.data(), id_tensor.numel(), - back_inserter(ids_union)); + auto &id_tensor = scope.FindVar(id_name)->Get(); + std::vector ids; + TensorToVector(id_tensor, context.device_context(), &ids); + ids_union.insert(ids_union.end(), ids.begin(), ids.end()); + ids_group.push_back(ids); + ids_lods.push_back(id_tensor.lod()); } std::unordered_set s(ids_union.begin(), ids_union.end()); @@ -258,25 +258,48 @@ void prefetchs(const std::vector &id_var_names, } for (size_t i = 0; i < out_var_names.size(); i++) { - auto *in_var = scope.FindVar(id_var_names[i]); - auto &id_tensor = in_var->Get(); - auto ids_size = id_tensor.dims()[0]; - const auto *id_data = id_tensor.data(); - + std::vector ids = ids_group[i]; + auto ids_size = ids.size(); auto *out_t = scope.FindVar(out_var_names[i])->GetMutable(); - out_t->set_lod(id_tensor.lod()); - out_t->Resize(framework::make_ddim({ids_size, vec_dim_1})); + out_t->set_lod(ids_lods[i]); + out_t->Resize( + framework::make_ddim({static_cast(ids_size), vec_dim_1})); auto *out_d = out_t->mutable_data(place); - for (auto idx = 0; idx < static_cast(ids_size); idx++) { - const auto &id = id_data[idx]; - if (padding_idx != distributed::kNoPadding && id == padding_idx) { - memset(out_d + idx * vec_dim_1, 0, sizeof(float) * vec_dim_1); - } else { - std::copy_n(recved_vec_map[id].begin(), vec_dim_1, - out_d + idx * vec_dim_1); + if (platform::is_cpu_place(out_t->place())) { + for (auto idx = 0; idx < static_cast(ids_size); idx++) { + const auto &id = ids[idx]; + if (padding_idx != distributed::kNoPadding && id == padding_idx) { + memset(out_d + idx * vec_dim_1, 0, sizeof(float) * vec_dim_1); + } else { + std::copy_n(recved_vec_map[id].begin(), vec_dim_1, + out_d + idx * vec_dim_1); + } + } + } else { +#ifdef PADDLE_WITH_CUDA + for (auto idx = 0; idx < static_cast(ids_size); idx++) { + const auto &id = ids[idx]; + auto stream = context.cuda_device_context().stream(); + if (padding_idx != distributed::kNoPadding && id == padding_idx) { + platform::GpuMemsetAsync(out_d + idx * vec_dim_1, 0, + sizeof(float) * vec_dim_1, stream); + } else { + auto &cpu_place = + BOOST_GET_CONST(platform::CPUPlace, + paddle::platform::CPUDeviceContext().GetPlace()); + auto &gpu_place = + BOOST_GET_CONST(platform::CUDAPlace, out_t->place()); + memory::Copy(gpu_place, out_d + idx * vec_dim_1, cpu_place, + &recved_vec_map[id][0], sizeof(float) * vec_dim_1, + stream); + } } +#else + PADDLE_ENFORCE(true, platform::errors::PermissionDenied( + "Paddle is not compiled with GPU!")); +#endif } } } diff --git a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc index 3037a63b0d7b4e8812e67fdfb776f89ea43eb546..8c093d12585981ee681ae13f0d2e493197c6b9b3 100644 --- a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/parameter_prefetch.h" +#include "paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h" #include "paddle/fluid/operators/math/math_function.h" namespace paddle { @@ -75,47 +73,6 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { } }; -template -class DistributedLookupTableKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto ids_vars = context.MultiInputVar("Ids"); - auto emb_vars = context.MultiOutput("Embeddings"); - - auto id_names = context.InputNames("Ids"); - auto embedding_name = context.InputNames("W").front(); - auto out_names = context.OutputNames("Outputs"); - auto lookup_tables = context.Attr>("table_names"); - auto endpoints = context.Attr>("endpoints"); - auto is_distributed = context.Attr("is_distributed"); - - auto lookup_table_version = - context.Attr("lookup_table_version"); - - operators::distributed::prefetchs(id_names, out_names, embedding_name, - is_distributed, lookup_tables, endpoints, - context, context.scope()); - - if (lookup_table_version == "lookup_table_v2") { - auto &scope = context.scope(); - auto emb_dim = - scope.FindVar(embedding_name)->Get().dims()[1]; - - for (size_t i = 0; i < id_names.size(); ++i) { - auto *id_var = scope.FindVar(id_names[i]); - auto *out_var = scope.FindVar(out_names[i]); - auto *id_tensor = id_var->GetMutable(); - auto *out_tensor = out_var->GetMutable(); - - auto id_dims = id_tensor->dims(); - out_tensor->Resize(framework::make_ddim( - {static_cast(id_dims[0]), static_cast(id_dims[1]), - static_cast(emb_dim)})); - } - } - } -}; - class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { @@ -170,15 +127,12 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Lookup Tablel Prefetch Operator. - This operator is used to perform lookup on parameter W, then concatenated into a sparse tensor. - The type of Ids(Input) is SelectedRows, the rows of Ids contains the ids to be looked up in W; if the Id is not in the sparse table, this operator will return a random value and set the value into the table for the next looking up. - )DOC"); } }; @@ -191,4 +145,5 @@ REGISTER_OPERATOR(distributed_lookup_table, ops::DistributedLookupTableOp, ops::DistributedLookupTableOpMaker); REGISTER_OP_CPU_KERNEL(distributed_lookup_table, - ops::DistributedLookupTableKernel); + ops::DistributedLookupTableKernel< + paddle::platform::CPUDeviceContext, float>); diff --git a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cu.cc b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..54c894415096e869f363eda6a1de2a473e839263 --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cu.cc @@ -0,0 +1,22 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + distributed_lookup_table, + ops::DistributedLookupTableKernel); diff --git a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a71451c78a870b71c05b41bdcfb34a85b3e2213b --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/distributed/parameter_prefetch.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class DistributedLookupTableKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto ids_vars = context.MultiInputVar("Ids"); + auto emb_vars = context.MultiOutput("Embeddings"); + + auto id_names = context.InputNames("Ids"); + auto embedding_name = context.InputNames("W").front(); + auto out_names = context.OutputNames("Outputs"); + auto lookup_tables = context.Attr>("table_names"); + auto endpoints = context.Attr>("endpoints"); + auto is_distributed = context.Attr("is_distributed"); + + operators::distributed::prefetchs(id_names, out_names, embedding_name, + is_distributed, lookup_tables, endpoints, + context, context.scope()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed_ops/recv_save_op.cc b/paddle/fluid/operators/distributed_ops/recv_save_op.cc index ccc30d1ea082a6f69b71059631247144c931116e..d194fcda36a474fa208f5d5a67e425ba5a5a3303 100644 --- a/paddle/fluid/operators/distributed_ops/recv_save_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_save_op.cc @@ -44,7 +44,7 @@ class RecvSaveOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::proto::VarType::Type(ctx.Attr("dtype")), - ctx.GetPlace()); + platform::CPUPlace()); } }; diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h index 2580b00d7c2bdfae726b924bb51de199586e12c3..cec706300d77b2b0e66e5b682dfad8536b5dc401 100644 --- a/paddle/fluid/operators/dot_op.h +++ b/paddle/fluid/operators/dot_op.h @@ -26,6 +26,86 @@ template using EigenMatrix = framework::EigenMatrix; +template +void DotGradFunction(const Tensor* tensor_x, const Tensor* tensor_y, + const Tensor* tensor_dout, Tensor* tensor_dx, + Tensor* tensor_dy, + const paddle::framework::ExecutionContext& ctx) { +#ifdef __NVCC__ + if (1 == tensor_dout->dims().size()) { + auto dout = framework::EigenVector::Flatten(*tensor_dout); + + if (tensor_dx) { + auto y = framework::EigenVector::Flatten(*tensor_y); + auto dx = framework::EigenVector::Flatten(*tensor_dx); + auto& dev = *ctx.template device_context().eigen_device(); + Eigen::DSizes size(tensor_dx->numel()); + dx.device(dev) = y * dout.broadcast(size); + } + + if (tensor_dy) { + auto x = framework::EigenVector::Flatten(*tensor_x); + auto dy = framework::EigenVector::Flatten(*tensor_dy); + auto& dev = *ctx.template device_context().eigen_device(); + Eigen::DSizes size(tensor_dy->numel()); + dy.device(dev) = x * dout.broadcast(size); + } + } else { + auto dout = EigenMatrix::From(*tensor_dout); + + if (tensor_dx) { + tensor_dx->mutable_data(ctx.GetPlace()); + auto y = EigenMatrix::From(*tensor_y); + auto dx = EigenMatrix::From(*tensor_dx); + auto& dev = *ctx.template device_context().eigen_device(); + Eigen::DSizes size(1, tensor_dx->dims()[1]); + dx.device(dev) = y * dout.broadcast(size); + } + + if (tensor_dy) { + tensor_dy->mutable_data(ctx.GetPlace()); + auto x = EigenMatrix::From(*tensor_x); + auto dy = EigenMatrix::From(*tensor_dy); + auto& dev = *ctx.template device_context().eigen_device(); + Eigen::DSizes size(1, tensor_dy->dims()[1]); + dy.device(dev) = x * dout.broadcast(size); + } + } +#else + const auto* data_dout = tensor_dout->data(); + + if (tensor_dx) { + auto* data_dx = tensor_dx->mutable_data(ctx.GetPlace()); + const auto* data_y = tensor_y->data(); + const framework::DDim& dim = tensor_x->dims(); + size_t N = static_cast(framework::product(dim)); + + auto step = dim[dim.size() - 1]; + + int s = -1; + for (size_t i = 0; i < N; ++i) { + if (0 == i % step) ++s; + data_dx[i] = data_y[i] * data_dout[s]; + } + } + + if (tensor_dy) { + auto* data_dy = tensor_dy->mutable_data(ctx.GetPlace()); + const auto* data_x = tensor_x->data(); + const framework::DDim& dim = tensor_y->dims(); + size_t N = static_cast(framework::product(dim)); + + auto step = dim[dim.size() - 1]; + + int s = -1; + for (size_t i = 0; i < N; ++i) { + if (0 == i % step) ++s; + data_dy[i] = data_x[i] * data_dout[s]; + } + } +#endif +} + template class DotKernel : public framework::OpKernel { public: @@ -84,83 +164,9 @@ class DotGradKernel : public framework::OpKernel { if (tensor_dx) tensor_dx->mutable_data(ctx.GetPlace()); if (tensor_dy) tensor_dy->mutable_data(ctx.GetPlace()); -#ifdef __NVCC__ - if (1 == tensor_dout->dims().size()) { - auto dout = framework::EigenVector::Flatten(*tensor_dout); - - if (tensor_dx) { - auto y = framework::EigenVector::Flatten(*tensor_y); - auto dx = framework::EigenVector::Flatten(*tensor_dx); - auto& dev = - *ctx.template device_context().eigen_device(); - Eigen::DSizes size(tensor_dx->numel()); - dx.device(dev) = y * dout.broadcast(size); - } - - if (tensor_dy) { - auto x = framework::EigenVector::Flatten(*tensor_x); - auto dy = framework::EigenVector::Flatten(*tensor_dy); - auto& dev = - *ctx.template device_context().eigen_device(); - Eigen::DSizes size(tensor_dy->numel()); - dy.device(dev) = x * dout.broadcast(size); - } - } else { - auto dout = EigenMatrix::From(*tensor_dout); - - if (tensor_dx) { - tensor_dx->mutable_data(ctx.GetPlace()); - auto y = EigenMatrix::From(*tensor_y); - auto dx = EigenMatrix::From(*tensor_dx); - auto& dev = - *ctx.template device_context().eigen_device(); - Eigen::DSizes size(1, tensor_dx->dims()[1]); - dx.device(dev) = y * dout.broadcast(size); - } - - if (tensor_dy) { - tensor_dy->mutable_data(ctx.GetPlace()); - auto x = EigenMatrix::From(*tensor_x); - auto dy = EigenMatrix::From(*tensor_dy); - auto& dev = - *ctx.template device_context().eigen_device(); - Eigen::DSizes size(1, tensor_dy->dims()[1]); - dy.device(dev) = x * dout.broadcast(size); - } - } -#else - const auto* data_dout = tensor_dout->data(); - - if (tensor_dx) { - auto* data_dx = tensor_dx->mutable_data(ctx.GetPlace()); - const auto* data_y = tensor_y->data(); - const framework::DDim& dim = tensor_x->dims(); - size_t N = static_cast(framework::product(dim)); - - auto step = dim[dim.size() - 1]; - int s = -1; - for (size_t i = 0; i < N; ++i) { - if (0 == i % step) ++s; - data_dx[i] = data_y[i] * data_dout[s]; - } - } - - if (tensor_dy) { - auto* data_dy = tensor_dy->mutable_data(ctx.GetPlace()); - const auto* data_x = tensor_x->data(); - const framework::DDim& dim = tensor_y->dims(); - size_t N = static_cast(framework::product(dim)); - - auto step = dim[dim.size() - 1]; - - int s = -1; - for (size_t i = 0; i < N; ++i) { - if (0 == i % step) ++s; - data_dy[i] = data_x[i] * data_dout[s]; - } - } -#endif + DotGradFunction(tensor_x, tensor_y, tensor_dout, + tensor_dx, tensor_dy, ctx); } }; diff --git a/paddle/fluid/operators/dropout_op.h b/paddle/fluid/operators/dropout_op.h index 676361289e888a5bbd71b63ff16a1ddd7e5dad51..bce4c7ca19a603fd2eadaff7f82b5cdec91bb79f 100644 --- a/paddle/fluid/operators/dropout_op.h +++ b/paddle/fluid/operators/dropout_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -55,6 +56,8 @@ class CPUDropoutKernel : public framework::OpKernel { return; } + bool init_generator_py = framework::Generator::GetInstance()->is_init_py; + // NOTE: fixed seed should only be used in unittest or for debug. // Guarantee to use random seed in training. std::random_device rnd; @@ -71,7 +74,11 @@ class CPUDropoutKernel : public framework::OpKernel { std::uniform_real_distribution dist(0, 1); for (size_t i = 0; i < size; ++i) { - if (dist(engine) < dropout_prob) { + float cur_random = + init_generator_py + ? dist(framework::Generator::GetInstance()->GetCPUEngine()) + : dist(engine); + if (cur_random < dropout_prob) { mask_data[i] = 0; y_data[i] = 0; } else { diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc index 5a398fa50febe2efffd588ce8f3612f1f9cec0b6..457d9e79d7da171ef526d5cab0e59b021cb64f98 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc @@ -49,6 +49,8 @@ REGISTER_OP_WITHOUT_GRADIENT(elementwise_floordiv, ops::ElementwiseOp, REGISTER_OP_CPU_KERNEL( elementwise_floordiv, + ops::ElementwiseFloorDivKernel, + ops::ElementwiseFloorDivKernel, ops::ElementwiseFloorDivKernel, ops::ElementwiseFloorDivKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cu b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cu index 60846d1e8fee1c7f68ac101f18355750c2c15a4d..f63d6f037632c1a6a05726b933b2258adc113ee3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cu @@ -19,5 +19,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( elementwise_floordiv, + ops::ElementwiseFloorDivKernel, + ops::ElementwiseFloorDivKernel, ops::ElementwiseFloorDivKernel, ops::ElementwiseFloorDivKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.h b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.h index 2d24e394d5c823dbd22c837210e46cefeceba1be..a5909aad99a82529a0739cd28b1b72a146524f76 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" @@ -24,7 +25,16 @@ namespace operators { template struct FloorDivFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { return a / b; } + inline HOSTDEVICE T operator()(T a, T b) const { + return static_cast(floor(a / b)); + } +}; + +template +struct InverseFloorDivFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { + return static_cast(floor(b / a)); + } }; template @@ -32,8 +42,15 @@ void elementwise_floor_div(const framework::ExecutionContext &ctx, const framework::Tensor *x, const framework::Tensor *y, framework::Tensor *z) { int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>( - ctx, x, y, axis, FloorDivFunctor(), z); + auto x_dims = x->dims(); + auto y_dims = y->dims(); + if (x_dims.size() >= y_dims.size()) { + ElementwiseComputeEx, DeviceContext, T>( + ctx, x, y, axis, FloorDivFunctor(), z); + } else { + ElementwiseComputeEx, DeviceContext, T>( + ctx, x, y, axis, InverseFloorDivFunctor(), z); + } } template diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc index af80666b9542db1073f3b07618433671652fffa4..8c2e62bed195f27e228d5dd460ba21ed87c3f5d2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc @@ -25,14 +25,14 @@ class ElementwiseModOpMaker : public ElementwiseOpMaker { void AddInputX() override { AddInput("X", - "(Variable), Tensor or LoDTensor of any dimensions. Its dtype " - "should be int32, int64."); + "(Tensor), Tensor of any dimensions. Its dtype " + "should be int32, int64, float32 or float64."); } void AddInputY() override { AddInput("Y", - "(Variable), Tensor or LoDTensor of any dimensions. Its dtype " - "should be int32, int64."); + "(Tensor), Tensor of any dimensions. Its dtype " + "should be int32, int64, float32 or float64."); } std::string GetOpFuntionality() const override { diff --git a/paddle/fluid/operators/expand_as_v2_op.cc b/paddle/fluid/operators/expand_as_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..495b640bb4399736456bf391c6522686b9763951 --- /dev/null +++ b/paddle/fluid/operators/expand_as_v2_op.cc @@ -0,0 +1,150 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/expand_as_v2_op.h" +#include +#include + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class ExpandAsV2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ExpandAsV2"); + OP_INOUT_CHECK(ctx->HasInput("target_tensor"), "Input", "target_tensor", + "ExpandAsV2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ExpandAsV2"); + auto x_dims = ctx->GetInputDim("X"); + auto target_tensor_dims = ctx->GetInputDim("target_tensor"); + PADDLE_ENFORCE_GE( + target_tensor_dims.size(), static_cast(x_dims.size()), + platform::errors::InvalidArgument( + "The rank of Input(target_tensor) must be greater than or equal " + "to the rank of Input(X). But received Input(X): input " + "rank %u, input shape [%s]; received Input(target_tensor): " + "input rank %u, input shape [%s].", + x_dims.size(), x_dims, target_tensor_dims.size(), + target_tensor_dims)); + PADDLE_ENFORCE_LE( + target_tensor_dims.size(), MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of Input(target_tensor) must not be less than or equal " + "to %d. But received: input rank %u, input shape [%s].", + MAX_RANK_SUPPORTED, x_dims.size(), x_dims)); + std::vector out_shape(target_tensor_dims.size()); + ctx->SetOutputDim("Out", framework::make_ddim(out_shape)); + } +}; + +class ExpandAsV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "X is the input to be expanded."); + AddOutput("Out", + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "The rank of Output(Out) have the same with Input(X). " + "After expanding, size of each dimension of Output(Out) is equal " + "to size of the corresponding dimension of Input(X) multiplying " + "the corresponding value given by Attr(expand_times)."); + AddInput("target_tensor", "Expand tensor's shape for each dimension."); + AddComment(R"DOC( +Expand the input by given times number. You should set times +number for each dimension by providing tensor 'expend_tensor'. The rank of X +should be in [1, 6]. Please note that size of 'expend_tensor' must be the same +with X's rank. Following is a using case: +Input(X) is a 3-D tensor with shape [2, 3, 1]: + [ + [[1], [2], [3]], + [[4], [5], [6]] + ] +target_tensors'shape: [2, 6, 2] +Output(Out) is a 3-D tensor with shape [2, 6, 2]: + [ + [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]], + [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]] + ] +)DOC"); + } +}; + +class ExpandAsV2GradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ExpandAsV2Grad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "ExpandAsV2Grad"); + + auto x_dims = ctx->GetInputDim("X"); + auto x_grad_name = framework::GradVarName("X"); + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } +}; + +template +class ExpandAsV2GradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("expand_as_v2_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("target_tensor", this->Input("target_tensor")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetAttrMap(this->Attrs()); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERER(ExpandAsV2GradNoNeedBufVarsInferer, "X"); + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(expand_as_v2, ops::ExpandAsV2Op, ops::ExpandAsV2OpMaker, + ops::ExpandAsV2GradOpMaker, + ops::ExpandAsV2GradOpMaker); +REGISTER_OPERATOR(expand_as_v2_grad, ops::ExpandAsV2GradOp, + ops::ExpandAsV2GradNoNeedBufVarsInferer); +REGISTER_OP_CPU_KERNEL( + expand_as_v2, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel); +REGISTER_OP_CPU_KERNEL( + expand_as_v2_grad, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel); diff --git a/paddle/fluid/operators/expand_as_v2_op.cu b/paddle/fluid/operators/expand_as_v2_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..e315144472dd9fd4095043e4800a3f276d9314c7 --- /dev/null +++ b/paddle/fluid/operators/expand_as_v2_op.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/expand_as_v2_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + expand_as_v2, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel, + ops::ExpandAsV2Kernel); +REGISTER_OP_CUDA_KERNEL( + expand_as_v2_grad, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel, + ops::ExpandAsV2GradKernel); diff --git a/paddle/fluid/operators/expand_as_v2_op.h b/paddle/fluid/operators/expand_as_v2_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a4c30dfe1298d1736407c33f40d72dc690046cba --- /dev/null +++ b/paddle/fluid/operators/expand_as_v2_op.h @@ -0,0 +1,214 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +#define MAX_RANK_SUPPORTED 6 + +#define EXPAND_AS_TEMPLATE(z, n, data) \ + case n + 1: { \ + ExpandAs(context); \ + break; \ + } +#define REP_EXPAND_AS_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_AS_TEMPLATE, ~) +#define COND(n) BOOST_PP_GREATER_EQUAL(n, BOOST_PP_MOD(n, MAX_RANK_SUPPORTED)) +#define EXPAND_AS_GRAD_CASE(n) \ + case n: { \ + ExpandAsBackward(context, reshape_dims_vec, reduce_dims_vec); \ + break; \ + } +#define EXPAND_AS_GRAD_TEMPLATE(z, n, data) \ + BOOST_PP_IF(COND(n), EXPAND_AS_GRAD_CASE(n), ) +#define REP_EXPAND_AS_GRAD_TEMPLATE(n) \ + BOOST_PP_REPEAT(n, EXPAND_AS_GRAD_TEMPLATE, ~) + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenTensor = framework::EigenTensor; + +template +class ExpandAsV2Kernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + auto* target_tensor = context.Input("target_tensor"); + auto target_rank = target_tensor->dims().size(); + PADDLE_ENFORCE_GE(target_rank, rank, + platform::errors::InvalidArgument( + "The rank (%d) of the input 'target_tensor' for " + "expand_as_v2 op must be greater than or equal to " + "the rank (%d) of the input 'x'.", + target_rank, rank)); + PADDLE_ENFORCE_GE(rank, 1, platform::errors::InvalidArgument( + "The rank (%d) of the input 'x' for " + "expand_as_v2 op must be positive.", + rank)); + PADDLE_ENFORCE_LE(target_rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank (%d) of the input 'target_tensor' for " + "expand_as_v2 op must be less than or equal to %d.", + target_rank, MAX_RANK_SUPPORTED)); + + switch (target_rank) { REP_EXPAND_AS_TEMPLATE(MAX_RANK_SUPPORTED) } + } + + protected: + template + void ExpandAs(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + auto in_dims = in0->dims(); + auto* target_tensor = context.Input("target_tensor"); + auto vec_in_dims = framework::vectorize(in_dims); + auto target_shape = framework::vectorize(target_tensor->dims()); + auto diff = target_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE(target_shape[i], 0, + platform::errors::InvalidArgument( + "The value of target shape cannot be zero.")); + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], target_shape[i], + platform::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in " + "target tensor for expand_as_v2 op.", + vec_in_dims[i], target_shape[i])); + repeat_times[i] = 1; + } else { + repeat_times[i] = target_shape[i]; + } + } + auto* out0 = context.Output("Out"); + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; + } + + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims = framework::make_ddim(target_shape); + + out0->Resize(out_dims); + auto x = EigenTensor::From(*in0, new_in_dims); + out0->mutable_data(context.GetPlace()); + auto y = EigenTensor::From(*out0, out_dims); + auto& place = + *context.template device_context().eigen_device(); + y.device(place) = x.broadcast(bcast_dims); + } +}; + +template +class ExpandAsV2GradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto* target_tensor = context.Input("target_tensor"); + auto x_dims = in0->dims(); + auto target_shape = target_tensor->dims(); + auto vec_in_dims = framework::vectorize(x_dims); + auto diff = target_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + repeat_times[i] = target_shape[i] / vec_in_dims[i]; + } + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), + out0); + } else { + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_as_v2_grad op must be greater than or " + "equal to 1, but the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_as_v2_grad op must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, dims)); + switch (dims) { REP_EXPAND_AS_GRAD_TEMPLATE(MAX_RANK_SUPPORTED) } + } + } + + protected: + template + void ExpandAsBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + x_grad.device( + *context.template device_context().eigen_device()) = + out_grad.reshape(reshape_dims) + .sum(reduce_dims) + .reshape(x_grad.dimensions()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..359d512c341529579a56dbe840e5eef0aa3062a5 --- /dev/null +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/expand_v2_op.h" +#include +#include +#include + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class ExpandV2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ExpandV2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ExpandV2"); + auto x_dims = ctx->GetInputDim("X"); + auto expand_shape = ctx->Attrs().Get>("shape"); + + if (expand_shape.size() == 0) { + expand_shape = std::vector(x_dims.size(), -1); + } + + PADDLE_ENFORCE_GE( + expand_shape.size(), static_cast(x_dims.size()), + platform::errors::InvalidArgument( + "The number of elements (%d) of 'shape' for " + "expand_v2 op must be greater than or equal to the rank " + "(%d) of the input.", + expand_shape.size(), static_cast(x_dims.size()))); + PADDLE_ENFORCE_LE(expand_shape.size(), MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number of elements (%d) of 'shape' for " + "must not be greater than %d.", + expand_shape.size(), MAX_RANK_SUPPORTED)); + PADDLE_ENFORCE_GE(expand_shape.size(), 1, + platform::errors::InvalidArgument( + "The number of elements (%d) of 'shape' for " + "must be a positive integer.", + expand_shape.size())); + + auto out_rank = + std::max(static_cast(x_dims.size()), expand_shape.size()); + std::vector out_shape(out_rank); + auto x_dim_vec = framework::vectorize(x_dims); + auto diff = expand_shape.size() - x_dim_vec.size(); + x_dim_vec.insert(x_dim_vec.begin(), diff, -1); + for (size_t i = 0; i < expand_shape.size(); ++i) { + if (x_dims[i] == -1) { + out_shape[i] = -1; + } else if (expand_shape[i] == -1) { + out_shape[i] = x_dims[i]; + } else { + PADDLE_ENFORCE_GT( + expand_shape[i], 0, + platform::errors::InvalidArgument( + "The %uth element of 'shape' for expand_v2 op must be " + "greater than 0, but the value given is %d.", + i, expand_shape[i])); + out_shape[i] = expand_shape[i]; + } + } + + ctx->SetOutputDim("Out", framework::make_ddim(out_shape)); + if (out_shape[0] == x_dims[0]) { + ctx->ShareLoD("X", "Out"); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "expand_shapes_tensor" || var_name == "Shape") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class ExpandV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "X is the input to be expanded."); + AddInput("Shape", + "(Tensor), optional). If provided, expand according to " + "this given Shape. It has a higher priority than " + "expand_shapes_tensor and the shape attribute.") + .AsDispensable(); + AddInput("expand_shapes_tensor", + "(Tensor Tensor), epxanded shape for X." + "It has a higher priority than shape attribute, but a lower " + "priority than the input Shape") + .AsDuplicable() + .AsDispensable(); + AddOutput("Out", + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "The rank of Output(Out) have the same with Input(X). " + "After expanding, size of each dimension of Output(Out) is equal " + "to size of the corresponding dimension of Input(X) multiplying " + "the corresponding value given by Attr(expand_times)."); + AddAttr>("shape", "The expanded shape for each dimension.") + .SetDefault({}); + AddComment(R"DOC( +Expand the input to the given shape. The rank of X +should be in [1, 6] and size of 'shape' must be in [1, 6] also. +Following is a using case: + +Input(X) is a 3-D tensor with shape [2, 3, 1]: + + [ + [[1], [2], [3]], + [[4], [5], [6]] + ] + +Attr(shape): [2, 6, 2] + +Output(Out) is a 3-D tensor with shape [2, 6, 2]: + + [ + [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]], + [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]] + ] + +)DOC"); + } +}; + +class ExpandV2GradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ExpandV2Grad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "ExpandV2Grad"); + + auto x_dims = ctx->GetInputDim("X"); + std::vector expand_shape = ctx->Attrs().Get>("shape"); + if (expand_shape.size() == 0) { + expand_shape = std::vector(x_dims.size(), -1); + } + + auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); + auto x_dim_vec = framework::vectorize(x_dims); + auto diff = expand_shape.size() - x_dim_vec.size(); + x_dim_vec.insert(x_dim_vec.begin(), diff, -1); + + for (size_t i = 0; i < expand_shape.size(); ++i) { + if (expand_shape[i] == -1 || x_dim_vec[i] == -1) { + continue; + } else { + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ( + expand_shape[i], out_dims[i], + platform::errors::InvalidArgument( + "The size (%d) of the dimension %d of Input(Out@GRAD) should " + "be equal to the crroresponding dimension size of shape(%d).", + out_dims[i], i, expand_shape[i])); + } + } + } + auto x_grad_name = framework::GradVarName("X"); + + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "expand_shapes_tensor" || var_name == "Shape") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +template +class ExpandV2GradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("expand_v2_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetInput("expand_shapes_tensor", this->Input("expand_shapes_tensor")); + op->SetInput("Shape", this->Input("Shape")); + op->SetAttrMap(this->Attrs()); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERER(ExpandV2GradNoNeedBufVarsInferer, "X"); + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(expand_v2, ops::ExpandV2Op, ops::ExpandV2OpMaker, + ops::ExpandV2GradOpMaker, + ops::ExpandV2GradOpMaker); +REGISTER_OPERATOR(expand_v2_grad, ops::ExpandV2GradOp, + ops::ExpandV2GradNoNeedBufVarsInferer); +REGISTER_OP_CPU_KERNEL( + expand_v2, ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel); +REGISTER_OP_CPU_KERNEL( + expand_v2_grad, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel); diff --git a/paddle/fluid/operators/expand_v2_op.cu b/paddle/fluid/operators/expand_v2_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..e096dbc27f0c2ae8142da40b9db99074b2719387 --- /dev/null +++ b/paddle/fluid/operators/expand_v2_op.cu @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/expand_v2_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + expand_v2, ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel, + ops::ExpandV2Kernel); +REGISTER_OP_CUDA_KERNEL( + expand_v2_grad, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel, + ops::ExpandV2GradKernel); diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ec9c6e62f272ed87abc4e0be6ccf1de3aedf15d4 --- /dev/null +++ b/paddle/fluid/operators/expand_v2_op.h @@ -0,0 +1,296 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +#define MAX_RANK_SUPPORTED 6 + +#define EXPAND_TEMPLATE(z, n, data) \ + case n + 1: { \ + Expand(context); \ + break; \ + } +#define REP_EXPAND_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_TEMPLATE, ~) +#define COND(n) BOOST_PP_GREATER_EQUAL(n, BOOST_PP_MOD(n, MAX_RANK_SUPPORTED)) +#define EXPAND_GRAD_CASE(n) \ + case n: { \ + ExpandBackward(context, reshape_dims_vec, reduce_dims_vec); \ + break; \ + } +#define EXPAND_GRAD_TEMPLATE(z, n, data) \ + BOOST_PP_IF(COND(n), EXPAND_GRAD_CASE(n), ) +#define REP_EXPAND_GRAD_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_GRAD_TEMPLATE, ~) + +namespace paddle { +namespace operators { +inline std::vector get_expand_shape( + const framework::ExecutionContext& ctx) { + if (ctx.HasInput("Shape")) { + auto* shape_tensor = ctx.Input("Shape"); + auto* shape_data = shape_tensor->data(); + framework::Tensor cpu_shape_tensor; + if (platform::is_gpu_place(shape_tensor->place())) { + TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } + auto vec_shape = + std::vector(shape_data, shape_data + shape_tensor->numel()); + return vec_shape; + } + + auto list_expand_shapes_tensor = + ctx.MultiInput("expand_shapes_tensor"); + if (list_expand_shapes_tensor.size() > 0) { + // get tensor from + std::vector vec_epxand_shape; + for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) { + auto tensor = list_expand_shapes_tensor[i]; + if (platform::is_gpu_place(tensor->place())) { + framework::Tensor temp; + TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_epxand_shape.push_back(*temp.data()); + } else { + vec_epxand_shape.push_back(*tensor->data()); + } + } + return vec_epxand_shape; + } else { + return ctx.Attr>("shape"); + } +} + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenTensor = framework::EigenTensor; +using framework::To32BitIndex; + +template +class ExpandV2Kernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be positive, " + "but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be less than " + "or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, rank)); + auto expand_shape = get_expand_shape(context); + auto shape_size = expand_shape.size(); + PADDLE_ENFORCE_GE( + shape_size, rank, + platform::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "greater than or equal to the rank (%d) of the input 'X'.", + shape_size, rank)); + PADDLE_ENFORCE_LE( + shape_size, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "less than or equal to %d.", + shape_size, MAX_RANK_SUPPORTED)); + rank = std::max(rank, static_cast(shape_size)); + switch (rank) { REP_EXPAND_TEMPLATE(MAX_RANK_SUPPORTED) } + } + + protected: + template + void Expand(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + + auto in_dims = in0->dims(); + auto expand_shape = get_expand_shape(context); + auto vec_in_dims = framework::vectorize(in_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE(expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size cannot be zero.")); + if (i < diff) { + PADDLE_ENFORCE_GT( + expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + expand_shape[i])); + repeat_times[i] = expand_shape[i]; + } else if (expand_shape[i] > 0) { + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], expand_shape[i], + platform::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in shape for expand_v2 op.", + vec_in_dims[i], expand_shape[i])); + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i]; + } + } else { + PADDLE_ENFORCE_EQ( + expand_shape[i], -1, + platform::errors::InvalidArgument( + "When the value in shape is negative for expand_v2 op, " + "only -1 is supported, but the value received is %d.", + expand_shape[i])); + repeat_times[i] = 1; + } + } + + auto* out0 = context.Output("Out"); + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; + } + + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + + out0->Resize(out_dims); + auto x = EigenTensor::From(*in0, new_in_dims); + out0->mutable_data(context.GetPlace()); + auto y = EigenTensor::From(*out0, out_dims); + auto& place = + *context.template device_context().eigen_device(); + // use 32-bit index to speed up + bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); + if (use_32bit_index) { + To32BitIndex(y).device(place) = To32BitIndex(x).broadcast(bcast_dims); + } else { + y.device(place) = x.broadcast(bcast_dims); + } + } +}; + +template +class ExpandV2GradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto expand_shape = get_expand_shape(context); + auto x_dims = in0->dims(); + auto vec_in_dims = framework::vectorize(x_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + if (expand_shape[i] < 0) { + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i] / vec_in_dims[i]; + } + } + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); + + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), + out0); + } else { + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be greater than or " + "equal to 1, but the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, dims)); + switch (dims) { REP_EXPAND_GRAD_TEMPLATE(MAX_RANK_SUPPORTED) } + } + } + + protected: + template + void ExpandBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + x_grad.device( + *context.template device_context().eigen_device()) = + out_grad.reshape(reshape_dims) + .sum(reduce_dims) + .reshape(x_grad.dimensions()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/eye_op.cc b/paddle/fluid/operators/eye_op.cc index 2cf08e5c3409acb6b8a43058e143df55fd563d74..793519b40182114c13e63dd32caaa382d55fa52d 100644 --- a/paddle/fluid/operators/eye_op.cc +++ b/paddle/fluid/operators/eye_op.cc @@ -83,7 +83,6 @@ Return an identity tensor whose shape is [num_rows, num_columns]. namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; -using float16 = paddle::platform::float16; REGISTER_OPERATOR( eye, ops::EyeOp, ops::EyeOpMaker, ops::EyeOpVarTypeInference, @@ -93,4 +92,4 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(eye, ops::EyeKernel, ops::EyeKernel, ops::EyeKernel, ops::EyeKernel, - ops::EyeKernel); + ops::EyeKernel); diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index 0d2b951ee1c544151e99af8216db7809e2a77852..9b0328b0945ba9b57cb9ab27233656e3b0af4f5f 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -37,20 +37,49 @@ template struct ChannelDequantizeFunctor { void operator()(const platform::CPUDeviceContext& dev_ctx, const framework::Tensor* in, const framework::Tensor** scales, - const int scale_num, T max_range, framework::Tensor* out) { + const int scale_num, T max_range, const int quant_axis, + framework::Tensor* out) { if (scale_num == 1) { - const int channel = in->dims()[0]; + // Dequant op is before quantized op + // Dequantize the weight of quantized op + auto in_dims = in->dims(); + const int64_t channel = in_dims[quant_axis]; const T* scale_factor = scales[0]->data(); - for (int i = 0; i < channel; i++) { - T s = scale_factor[i]; - framework::Tensor one_channel_in = in->Slice(i, i + 1); - framework::Tensor one_channel_out = out->Slice(i, i + 1); - auto in_e = framework::EigenVector::Flatten(one_channel_in); - auto out_e = framework::EigenVector::Flatten(one_channel_out); - auto& dev = *dev_ctx.eigen_device(); - out_e.device(dev) = in_e * s / max_range; + if (quant_axis == 0) { + for (int64_t i = 0; i < channel; i++) { + T s = scale_factor[i]; + framework::Tensor one_channel_in = in->Slice(i, i + 1); + framework::Tensor one_channel_out = out->Slice(i, i + 1); + auto in_e = framework::EigenVector::Flatten(one_channel_in); + auto out_e = framework::EigenVector::Flatten(one_channel_out); + auto& dev = *dev_ctx.eigen_device(); + out_e.device(dev) = in_e * s / max_range; + } + } else if (quant_axis == 1) { + int64_t out_iter = 1; + for (int i = 0; i < quant_axis; i++) { + out_iter *= in_dims[i]; + } + int64_t step_i = in->numel() / out_iter; + int64_t step_j = in->numel() / (out_iter * channel); + auto* in_data = in->data(); + auto* out_data = out->mutable_data(dev_ctx.GetPlace()); + for (int64_t i = 0; i < out_iter; i++) { + for (int64_t j = 0; j < channel; j++) { + auto* cur_in = in_data + i * step_i + j * step_j; + auto* cur_out = out_data + i * step_i + j * step_j; + T s = scale_factor[j]; + for (int64_t k = 0; k < step_j; k++) { + *cur_out = (*cur_in) * s / max_range; + ++cur_in; + ++cur_out; + } + } + } } } else if (scale_num == 2) { + // Dequant op is after quantized op + // Dequantize the output tensor of quantized op int batch_size = in->dims()[0]; int channel = in->dims()[1]; const T* scale_one = scales[0]->data(); @@ -157,6 +186,18 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker "Quantization bit numbers in quantization stage. " "The size of `quant_bits` should be equal to the size of `Scales`.") .SetDefault({8}); + AddAttr("quant_axis", + "(int, default 0) The axis for quantization. " + "For conv2d, depthwise_conv2d, conv2d_transpose " + "and mul, the quant_axis is equal to the cout axis.") + .SetDefault(0) + .AddCustomChecker([](const int& quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument( + "'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + }); AddComment(R"DOC( FakeChannelWiseDequantizeMaxAbsOp operator. diff --git a/paddle/fluid/operators/fake_dequantize_op.cu b/paddle/fluid/operators/fake_dequantize_op.cu index 02f9dc827d68cbb58447ed1557ff4bf310b2c017..54a92b055a39d49ea061250b066957f933fb975e 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu +++ b/paddle/fluid/operators/fake_dequantize_op.cu @@ -45,8 +45,9 @@ struct DequantizeFunctor { }; template -__global__ void DequantizeOneScale(const T* in, const T* scale, T max_range, - int num, int channel, T* out) { +__global__ void DequantizeOneScaleQuantAxis0(const T* in, const T* scale, + T max_range, int num, int channel, + T* out) { int tid = threadIdx.x; int channel_size = num / channel; const T* in_c = in + blockIdx.x * channel_size; @@ -56,6 +57,23 @@ __global__ void DequantizeOneScale(const T* in, const T* scale, T max_range, } } +template +__global__ void DequantizeOneScaleQuantAxis1(const T* in, const T* scale, + T max_range, const int num, + const int cin, const int cout, + T* out) { + int cout_wh_size = num / cin; + int wh_size = cout_wh_size / cout; + + T s = scale[blockIdx.x]; + const T* in_current = in + threadIdx.x * cout_wh_size + blockIdx.x * wh_size; + T* out_current = out + threadIdx.x * cout_wh_size + blockIdx.x * wh_size; + + for (int i = 0; i < wh_size; i++) { + out_current[i] = in_current[i] * s / max_range; + } +} + template __global__ void DequantizeTwoScale(const T* in, const T* scale_one, const T* scale_two, T max_range, int num, @@ -74,18 +92,29 @@ template struct ChannelDequantizeFunctor { void operator()(const platform::CUDADeviceContext& dev_ctx, const framework::Tensor* in, const framework::Tensor** scales, - const int scale_num, T max_range, framework::Tensor* out) { + const int scale_num, T max_range, const int quant_axis, + framework::Tensor* out) { + auto in_dims = in->dims(); const T* in_data = in->data(); T* out_data = out->mutable_data(dev_ctx.GetPlace()); if (scale_num == 1) { int num = in->numel(); - int channel = in->dims()[0]; const T* scale_factor = scales[0]->data(); - int block = 1024; - int grid = channel; - DequantizeOneScale<<>>( - in_data, scale_factor, max_range, num, channel, out_data); + if (quant_axis == 0) { + int grid = in_dims[0]; + int block = 1024; + DequantizeOneScaleQuantAxis0<<>>( + in_data, scale_factor, max_range, num, in_dims[0], out_data); + } else if (quant_axis == 1) { + // Dequantize weight of Cin * Cout * W * H + int grid = in_dims[1]; + int block = in_dims[0]; + DequantizeOneScaleQuantAxis1<<>>( + in_data, scale_factor, max_range, num, in_dims[0], in_dims[1], + out_data); + } } else if (scale_num == 2) { + // Not need to consider quant_axis int num = in->numel(); int batch_size = in->dims()[0]; int channel = in->dims()[1]; diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index 500960098f5ce5e66af5690138c15cc0eaa80d83..6ddb12771fd5176dbe27642adcb2ac82e4d7bfbf 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -33,7 +33,7 @@ template struct ChannelDequantizeFunctor { void operator()(const DeviceContext& dev_ctx, const framework::Tensor* in, const framework::Tensor** scales, const int scale_num, - T max_range, framework::Tensor* out); + T max_range, const int quant_axis, framework::Tensor* out); }; template @@ -63,6 +63,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); auto quant_bits = ctx.Attr>("quant_bits"); + auto quant_axis = ctx.Attr("quant_axis"); int max_range = 1; auto& dev_ctx = ctx.template device_context(); @@ -70,12 +71,12 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { int scale_num = scales.size(); if (scale_num == 1) { PADDLE_ENFORCE_EQ( - scales[0]->numel(), in->dims()[0], + scales[0]->numel(), in->dims()[quant_axis], platform::errors::PreconditionNotMet( "The number of first scale values must be the same with " - "first dimension value of Input(X) when the `Scales` has only " - "one element, but %ld != %ld here.", - scales[0]->numel(), in->dims()[0])); + "quant_axis dimension value of Input(X) when the `Scales` has " + "only one element, but %ld != %ld here.", + scales[0]->numel(), in->dims()[quant_axis])); max_range *= (std::pow(2, quant_bits[0] - 1) - 1); } else if (scale_num == 2) { PADDLE_ENFORCE_EQ( @@ -94,7 +95,8 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { (std::pow(2, quant_bits[1] - 1) - 1); } ChannelDequantizeFunctor()( - dev_ctx, in, scales.data(), scale_num, static_cast(max_range), out); + dev_ctx, in, scales.data(), scale_num, static_cast(max_range), + quant_axis, out); } }; diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 358f122c8359fa60f2c99492db8851c8a5fc5293..04ac4a35208a54361a4f434e68095e9519ee12e9 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fake_quantize_op.h" +#include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/clip_op.h" @@ -39,13 +40,41 @@ template struct FindAbsMaxFunctor; template struct FindChannelAbsMaxFunctor { - void operator()(const platform::CPUDeviceContext& ctx, const T* in, - const int num, const int channel, T* out) { - const int channel_size = num / channel; - for (int i = 0; i < channel; i++) { - auto* start = in + i * channel_size; - auto* end = in + (i + 1) * channel_size; - out[i] = std::abs(*(std::max_element(start, end, Compare()))); + void operator()(const platform::CPUDeviceContext& ctx, + const framework::Tensor& in_tensor, const int quant_axis, + T* out_abs_max) { + // At present, channelwise quantization supports conv2d, depthwise_conv2d + // conv2d_transpose and mul + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + auto* in_data = in_tensor.data(); + auto in_dims = in_tensor.dims(); + const int64_t channel = in_dims[quant_axis]; + if (quant_axis == 0) { + const int64_t channel_size = in_tensor.numel() / channel; + for (int64_t i = 0; i < channel; i++) { + auto* start = in_data + i * channel_size; + auto* end = in_data + (i + 1) * channel_size; + out_abs_max[i] = + std::abs(*(std::max_element(start, end, Compare()))); + } + } else if (quant_axis == 1) { + for (int64_t i = 0; i < channel; i++) { + out_abs_max[i] = 0; + } + const int64_t step_i = in_tensor.numel() / in_dims[0]; + const int64_t step_j = in_tensor.numel() / (in_dims[0] * in_dims[1]); + for (int64_t i = 0; i < in_dims[0]; i++) { + for (int64_t j = 0; j < in_dims[1]; j++) { + auto* start = in_data + i * step_i + j * step_j; + auto* end = in_data + i * step_i + (j + 1) * step_j; + T abs_max = std::abs(*(std::max_element(start, end, Compare()))); + out_abs_max[j] = std::max(out_abs_max[j], abs_max); + } + } } } }; @@ -92,26 +121,53 @@ template struct ChannelClipAndFakeQuantFunctor { void operator()(const platform::CPUDeviceContext& ctx, const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int channel, + const int bin_cnt, const int quant_axis, framework::Tensor* out) { + // At present, channelwise quantization supports conv2d, depthwise_conv2d + // conv2d_transpose and mul + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto* scale_data = scale.data(); auto* in_data = in.data(); auto* out_data = out->mutable_data(ctx.GetPlace()); - const int channel_size = in.numel() / channel; + auto in_dims = in.dims(); + const int64_t channel = in_dims[quant_axis]; platform::Transform trans; - for (int i = 0; i < channel; i++) { - T s = scale_data[i]; - auto* start = in_data + i * channel_size; - auto* end = in_data + (i + 1) * channel_size; - trans(ctx, start, end, out_data + i * channel_size, - ClipFunctor(-s, s)); - } - for (int i = 0; i < channel; i++) { - T s = scale_data[i]; - T inv_s = inverse(s); - framework::Tensor one_channel_out = out->Slice(i, i + 1); - auto out_e = framework::EigenVector::Flatten(one_channel_out); - out_e.device(*ctx.eigen_device()) = (bin_cnt * inv_s * out_e).round(); + if (quant_axis == 0) { + const int64_t channel_size = in.numel() / channel; + for (int64_t i = 0; i < channel; i++) { + T s = scale_data[i]; + auto* start = in_data + i * channel_size; + auto* end = in_data + (i + 1) * channel_size; + trans(ctx, start, end, out_data + i * channel_size, + ClipFunctor(-s, s)); + } + for (int64_t i = 0; i < channel; i++) { + T s = scale_data[i]; + T inv_s = inverse(s); + framework::Tensor one_channel_out = out->Slice(i, i + 1); + auto out_e = framework::EigenVector::Flatten(one_channel_out); + out_e.device(*ctx.eigen_device()) = (bin_cnt * inv_s * out_e).round(); + } + } else if (quant_axis == 1) { + const int64_t step_i = in.numel() / in_dims[0]; + const int64_t step_j = in.numel() / (in_dims[0] * in_dims[1]); + for (int i = 0; i < in_dims[0]; i++) { + for (int j = 0; j < in_dims[1]; j++) { + T s = scale_data[j]; + T inv_s = inverse(s); + auto* start = in_data + i * step_i + j * step_j; + auto* end = in_data + i * step_i + (j + 1) * step_j; + auto* cur_out_data = out_data + i * step_i + j * step_j; + trans(ctx, start, end, cur_out_data, ClipFunctor(-s, s)); + for (int k = 0; k < step_j; k++) { + cur_out_data[k] = std::round(bin_cnt * inv_s * cur_out_data[k]); + } + } + } } } }; @@ -247,8 +303,9 @@ class FakeChannelWiseQuantizeAbsMaxOp : public framework::OperatorWithKernel { "FakeChannelWiseQuantizeAbsMax"); OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", "FakeChannelWiseQuantizeAbsMax"); + int quant_axis = ctx->Attrs().Get("quant_axis"); ctx->SetOutputDim("Out", ctx->GetInputDim("X")); - ctx->SetOutputDim("OutScale", {ctx->GetInputDim("X")[0]}); + ctx->SetOutputDim("OutScale", {ctx->GetInputDim("X")[quant_axis]}); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -269,6 +326,18 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker "(Tensor) Output of quantized low level tensor, " "but also saved as float data type."); AddOutput("OutScale", "(Tensor) Current channel wise scale"); + AddAttr("quant_axis", + "(int, default 0) The axis for quantization. " + "For conv2d, depthwise_conv2d, conv2d_transpose " + "and mul, the quant_axis is equal to the cout axis.") + .SetDefault(0) + .AddCustomChecker([](const int& quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument( + "'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int& bit_length) { diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 75a55fa821f0af664ad18cc20c90cd2f3d61d5d0..6ff3c7ec632f236fe4ae6c6504537df3b8a46b7a 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -75,8 +75,8 @@ struct FindAbsMaxFunctor { template struct FindAbsMaxFunctor; template -__global__ void FindChannelAbsMaxKernel(const T* in, const int n, const int c, - T* out) { +__global__ void FindChannelAbsMaxKernelQuantAxis0(const T* in, const int n, + const int c, T* out) { int tid = threadIdx.x; int channel_size = n / c; const T* in_c = in + blockIdx.x * channel_size; @@ -100,14 +100,69 @@ __global__ void FindChannelAbsMaxKernel(const T* in, const int n, const int c, } } +template +__global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n, + const int cin, const int cout, + T* out) { + extern __shared__ T shared_max_data[]; + int cout_wh_size = n / cin; + int wh_size = n / (cin * cout); + + int tid = threadIdx.x; + int bid = blockIdx.x; + const T* in_current = in + tid * cout_wh_size + bid * wh_size; + shared_max_data[tid] = T(0); + for (int i = 0; i < wh_size; i++) { + T tmp = fabs(in_current[i]); + if (tmp > shared_max_data[tid]) { + shared_max_data[tid] = tmp; + } + } + __syncthreads(); + + int len = blockDim.x; + for (int i = (len + 1) / 2; i > 0; len = i, i = (i + 1) / 2) { + if (tid < i && tid + i < len && + shared_max_data[tid] < shared_max_data[tid + i]) { + shared_max_data[tid] = shared_max_data[tid + i]; + } + if (i == 1) { + i = 0; // break the loop + } + __syncthreads(); + } + if (tid == 0) { + out[bid] = shared_max_data[0]; + } +} + template struct FindChannelAbsMaxFunctor { - void operator()(const platform::CUDADeviceContext& ctx, const T* in, - const int num, const int channel, T* out) { - int block = 1024; - int grid = channel; - FindChannelAbsMaxKernel<<>>( - in, num, channel, out); + void operator()(const platform::CUDADeviceContext& ctx, + const framework::Tensor& in_tensor, const int quant_axis, + T* out_abs_max) { + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + const int num = in_tensor.numel(); + auto in_dims = in_tensor.dims(); + int channel = in_dims[quant_axis]; + const T* in_data = in_tensor.data(); + if (quant_axis == 0) { + int grid = channel; + int block = 1024; + FindChannelAbsMaxKernelQuantAxis0< + T><<>>( + in_data, num, channel, out_abs_max); + } else if (quant_axis == 1) { + int grid = in_dims[1]; + int block = in_dims[0]; + FindChannelAbsMaxKernelQuantAxis1< + T><<>>( + in_data, num, in_dims[0], in_dims[1], out_abs_max); + } } }; @@ -189,10 +244,12 @@ struct ClipAndFakeQuantDequantFunctor { template struct ClipAndFakeQuantDequantFunctor; +// ChannelClipAndQuantKernel for quant_axis is 0 template -__global__ void ChannelClipAndQuantKernel(const T* in, const T* scale, - const int bin_cnt, const int n, - const int c, T* out) { +__global__ void ChannelClipAndQuantKernelQuantAxis0(const T* in, const T* scale, + const int bin_cnt, + const int n, const int c, + T* out) { int tid = threadIdx.x; int channel_size = n / c; @@ -211,22 +268,57 @@ __global__ void ChannelClipAndQuantKernel(const T* in, const T* scale, } } +// ChannelClipAndQuantKernel for quant_axis is 1 +template +__global__ void ChannelClipAndQuantKernelQuantAxis1(const T* in, const T* scale, + const int bin_cnt, + const int n, const int cin, + const int cout, T* out) { + T s = scale[blockIdx.x % cout]; + T inv_s = inverse(s); + + int wh_size = n / (cin * cout); + const T* in_c = in + blockIdx.x * wh_size; + T* out_c = out + blockIdx.x * wh_size; + + for (int i = threadIdx.x; i < wh_size; i += blockDim.x) { + T x = in_c[i]; + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v); + } +} + template struct ChannelClipAndFakeQuantFunctor { void operator()(const platform::CUDADeviceContext& ctx, const framework::Tensor& in, const framework::Tensor& scale, - const int bin_cnt, const int channel, + const int bin_cnt, const int quant_axis, framework::Tensor* out) { - int num = in.numel(); - int block = 1024; - int grid = channel; + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + int num = in.numel(); + auto in_dims = in.dims(); const T* in_data = in.data(); const T* scale_data = scale.data(); T* out_data = out->mutable_data(ctx.GetPlace()); - ChannelClipAndQuantKernel<<>>( - in_data, scale_data, bin_cnt, num, channel, out_data); + if (quant_axis == 0) { + int grid = in_dims[0]; + int block = 1024; + ChannelClipAndQuantKernelQuantAxis0<<>>( + in_data, scale_data, bin_cnt, num, in_dims[0], out_data); + } else if (quant_axis == 1) { + int grid = in_dims[0] * in_dims[1]; + int block = 1024; + ChannelClipAndQuantKernelQuantAxis1<<>>( + in_data, scale_data, bin_cnt, num, in_dims[0], in_dims[1], out_data); + } } }; diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 4136217fb0c5f600971c1c04f803b65de9bbecb4..5c6e0b1f6e26d84462a18da910b412f03b93285d 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -61,15 +61,15 @@ struct FindRangeAbsMaxFunctor { template struct FindChannelAbsMaxFunctor { - void operator()(const DeviceContext& ctx, const T* in, const int num, - const int channel, T* out); + void operator()(const DeviceContext& ctx, const framework::Tensor& in_tensor, + const int quant_axis, T* out_abs_max); }; template struct ChannelClipAndFakeQuantFunctor { void operator()(const DeviceContext& ctx, const framework::Tensor& in, const framework::Tensor& scale, const int bin_cnt, - const int channel, framework::Tensor* out); + const int quant_axis, framework::Tensor* out); }; template @@ -144,12 +144,13 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { int bit_length = context.Attr("bit_length"); int bin_cnt = std::pow(2, bit_length - 1) - 1; + int quant_axis = context.Attr("quant_axis"); auto& dev_ctx = context.template device_context(); - FindChannelAbsMaxFunctor()( - dev_ctx, in->data(), in->numel(), in->dims()[0], out_scale_data); + FindChannelAbsMaxFunctor()(dev_ctx, *in, quant_axis, + out_scale_data); ChannelClipAndFakeQuantFunctor()( - dev_ctx, *in, *out_scale, bin_cnt, in->dims()[0], out); + dev_ctx, *in, *out_scale, bin_cnt, quant_axis, out); } }; diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index f59d46ec79bd0960392ed1b8b3c8ee27b2317e39..c4bdd9e439c54db03f8fa8c4fe439ed6edbd0c7a 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/malloc.h" +#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/place.h" @@ -158,5 +159,133 @@ void GPUGatherNd(const framework::ExecutionContext& context, end_size); } +template +__global__ void GatherGPUKernel(const T* input, const U* index, T* out, + int outer_dim_size, int inner_dim_size, + int out_index_dim_size, + int input_index_dim_size, int size) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < size; idx += blockDim.x * gridDim.x) { + int inner_dim_index = idx / (outer_dim_size * out_index_dim_size); + int next_idx = idx % (outer_dim_size * out_index_dim_size); + int index_dim_index = next_idx / (outer_dim_size); + int out_dim_index = next_idx % outer_dim_size; + int input_index = + inner_dim_index * (outer_dim_size * input_index_dim_size) + + index[index_dim_index] * outer_dim_size + out_dim_index; + out[idx] = input[input_index]; + } +} + +template +__global__ void GatherGradGPUKernel(const T* input, const U* index, T* out, + int outer_dim_size, int inner_dim_size, + int input_index_dim_size, + int out_index_dim_size, int size) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < size; idx += blockDim.x * gridDim.x) { + int inner_dim_index = idx / (outer_dim_size * input_index_dim_size); + int next_idx = idx % (outer_dim_size * input_index_dim_size); + int index_dim_index = next_idx / (outer_dim_size); + int out_dim_index = next_idx % outer_dim_size; + int out_index = inner_dim_index * (outer_dim_size * out_index_dim_size) + + index[index_dim_index] * outer_dim_size + out_dim_index; + paddle::platform::CudaAtomicAdd(out + out_index, *(input + idx)); + } +} + +template +void GatherV2CUDAFunction(const Tensor* input, const Tensor* index, + const Tensor* axis, Tensor* out, + const paddle::platform::Place& place, + const framework::ExecutionContext& ctx) { + int axis_size = axis->numel(); + int index_size = index->numel(); + int input_size = input->numel(); + auto input_dim = input->dims(); + auto* input_data = input->data(); + auto* index_data = index->data(); + + if (input->numel() == 0) return; + PADDLE_ENFORCE_EQ(axis_size, 1, + platform::errors::InvalidArgument( + "Axis size should be 1, but received %d", axis_size)); + Tensor cpu_axis; + framework::TensorCopy(*axis, platform::CPUPlace(), &cpu_axis); + int axis_index = cpu_axis.data()[0]; + int index_dim_size = input_dim[axis_index]; + + int inner_dim_size = 1; + int outer_dim_size = 1; + std::vector out_dim_vec; + + for (int i = 0; i < axis_index; i++) { + inner_dim_size *= input_dim[i]; + out_dim_vec.push_back(input_dim[i]); + } + out_dim_vec.push_back(index_size); + for (int i = axis_index + 1; i < input_dim.size(); i++) { + outer_dim_size *= input_dim[i]; + out_dim_vec.push_back(input_dim[i]); + } + auto out_dim = framework::make_ddim(out_dim_vec); + + out->Resize(out_dim); + auto* out_data = out->mutable_data(place); + int out_size = out->numel(); + + int threads = 512; + int grid = (out_size + threads - 1) / threads; + auto stream = ctx.cuda_device_context().stream(); + GatherGPUKernel<<>>( + input_data, index_data, out_data, outer_dim_size, inner_dim_size, + index_size, index_dim_size, out_size); +} + +template +void GatherV2GradCUDAFunction(const Tensor* input, const Tensor* index, + const Tensor* axis, Tensor* out, + const paddle::platform::Place& place, + const framework::ExecutionContext& ctx) { + auto* index_data = index->data(); + + int axis_size = axis->numel(); + int index_size = index->numel(); + int input_size = input->numel(); + auto input_dim = input->dims(); + auto* input_data = input->data(); + + if (input->numel() == 0) return; + PADDLE_ENFORCE_EQ(axis_size, 1, + platform::errors::InvalidArgument( + "Axis size should be 1, but received %d", axis_size)); + Tensor cpu_axis; + framework::TensorCopy(*axis, platform::CPUPlace(), &cpu_axis); + int axis_index = cpu_axis.data()[0]; + int input_index_dim_size = input_dim[axis_index]; + + int inner_dim_size = 1; + int outer_dim_size = 1; + + for (int i = 0; i < axis_index; i++) { + inner_dim_size *= input_dim[i]; + } + for (int i = axis_index + 1; i < input_dim.size(); i++) { + outer_dim_size *= input_dim[i]; + } + + auto* out_data = out->mutable_data(place); + auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); + auto out_dim = out->dims(); + int out_index_dim_size = out_dim[axis_index]; + operators::math::set_constant(*dev_ctx, out, 0.0); + + int threads = 512; + int grid = (input_size + threads - 1) / threads; + auto stream = ctx.cuda_device_context().stream(); + GatherGradGPUKernel<<>>( + input_data, index_data, out_data, outer_dim_size, inner_dim_size, + input_index_dim_size, out_index_dim_size, input_size); +} } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/gather.h b/paddle/fluid/operators/gather.h index f5a7bffe4745360a307a4b7c61b30c871cf6c756..c12a3b8adc97893f523b307a56c0e6b04ea8d675 100644 --- a/paddle/fluid/operators/gather.h +++ b/paddle/fluid/operators/gather.h @@ -15,10 +15,12 @@ limitations under the License. */ #pragma once #include #include +#include #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" namespace paddle { @@ -124,5 +126,110 @@ void CPUGatherNd(const platform::DeviceContext& ctx, const Tensor& input, } } +template +void GatherV2Function(const Tensor* input, const Tensor* index, + const Tensor* axis, Tensor* out, + const paddle::platform::Place& place) { + auto* axis_data = axis->data(); + auto* index_data = index->data(); + + int axis_size = axis->numel(); + int index_size = index->numel(); + int input_size = input->numel(); + auto input_dim = input->dims(); + auto* input_data = input->data(); + + if (input->numel() == 0) return; + PADDLE_ENFORCE_EQ(axis_size, 1, + platform::errors::InvalidArgument( + "Axis size should be 1, but received %d", axis_size)); + int axis_index = axis_data[0]; + + int input_index_dim_size = input_dim[axis_index]; + for (int i = 0; i < index_size; i++) { + PADDLE_ENFORCE_LT(index_data[i], input_index_dim_size, + platform::errors::InvalidArgument( + "The element of Index must be less than the size of " + "input dim size of axis which is %d, but received " + "index element which is %d in the %d index.", + input_index_dim_size, index_data[i], i)); + } + + int inner_dim_size = 1; + int outer_dim_size = 1; + std::vector out_dim_vec; + + for (int i = 0; i < axis_index; i++) { + inner_dim_size *= input_dim[i]; + out_dim_vec.push_back(input_dim[i]); + } + out_dim_vec.push_back(index_size); + for (int i = axis_index + 1; i < input_dim.size(); i++) { + outer_dim_size *= input_dim[i]; + out_dim_vec.push_back(input_dim[i]); + } + auto out_dim = framework::make_ddim(out_dim_vec); + + out->Resize(out_dim); + auto* out_data = out->mutable_data(place); + + int out_index = 0; + for (int i = 0; i < inner_dim_size; i++) { + for (int j = 0; j < index_size; j++) { + for (int k = 0; k < outer_dim_size; k++) { + int index = k + index_data[j] * outer_dim_size + + (i * input_size / inner_dim_size); + out_data[out_index] = input_data[index]; + out_index++; + } + } + } +} + +template +void GatherV2GradFunction(const Tensor* input, const Tensor* index, + const Tensor* axis, Tensor* out, + const paddle::platform::Place& place) { + auto* axis_data = axis->data(); + auto* index_data = index->data(); + + int axis_size = axis->numel(); + auto input_dim = input->dims(); + auto* input_data = input->data(); + + if (input->numel() == 0) return; + PADDLE_ENFORCE_EQ(axis_size, 1, + platform::errors::InvalidArgument( + "Axis size should be 1, but received %d", axis_size)); + int axis_index = axis_data[0]; + int input_index_dim_size = input_dim[axis_index]; + + int inner_dim_size = 1; + int outer_dim_size = 1; + + for (int i = 0; i < axis_index; i++) { + inner_dim_size *= input_dim[i]; + } + for (int i = axis_index + 1; i < input_dim.size(); i++) { + outer_dim_size *= input_dim[i]; + } + + auto* out_data = out->mutable_data(place); + auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); + auto out_dim = out->dims(); + int out_index_dim_size = out_dim[axis_index]; + operators::math::set_constant(*dev_ctx, out, 0.0); + + for (int i = 0; i < inner_dim_size; i++) { + for (int j = 0; j < input_index_dim_size; j++) { + for (int k = 0; k < outer_dim_size; k++) { + int index = k + index_data[j] * outer_dim_size + + i * outer_dim_size * out_index_dim_size; + out_data[index] += input_data[j * outer_dim_size + k]; + } + } + } +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/gather_nd_op.cc b/paddle/fluid/operators/gather_nd_op.cc index c22c8a18ca63a05265ac6991cf0e0cbd9e7ea5ed..1427bd04d3442be26be931ca31bf358ebd23efae 100644 --- a/paddle/fluid/operators/gather_nd_op.cc +++ b/paddle/fluid/operators/gather_nd_op.cc @@ -45,7 +45,7 @@ class GatherNdOp : public framework::OperatorWithKernel { index_dims[index_dims_size - 1], x_dims_size, platform::errors::InvalidArgument( "Input(Index).shape[-1] should be no greater than Input(X).rank")); - PADDLE_ENFORCE_GE(index_dims_size, 2UL, + PADDLE_ENFORCE_GE(index_dims_size, 1UL, platform::errors::InvalidArgument( "The rank of Input(Index) should be greater than 1")); diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 6a3abaa600281ac4a9762d5c73d398974abbf041..8a3450d1df97a2e99711f9ae029ca2668f38e2b0 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -78,6 +78,9 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "The source input of gather op"); AddInput("Index", "The index input of gather op"); + AddInput("Axis", + "The Tensor which contains the axis that we do gather operation.") + .AsDispensable(); AddOutput("Out", "The output of gather op"); AddAttr( "overwrite", @@ -120,6 +123,8 @@ class GatherGradOpMaker : public framework::SingleGradOpMaker { void Apply(GradOpPtr op) const override { op->SetType("gather_grad"); op->SetInput("Index", this->Input("Index")); + op->SetInput("Axis", this->Input("Axis")); + op->SetInput("X", this->Input("X")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); diff --git a/paddle/fluid/operators/gather_op.cu b/paddle/fluid/operators/gather_op.cu index 5bef547c0542b922f646f72ffb7310ef4eb279e9..37fbfb21f60a0568390c6798dc305c91fc8af886 100644 --- a/paddle/fluid/operators/gather_op.cu +++ b/paddle/fluid/operators/gather_op.cu @@ -31,6 +31,33 @@ class GatherOpCUDAKernel : public framework::OpKernel { auto *index = ctx.Input("Index"); auto *output = ctx.Output("Out"); + if (ctx.HasInput("Axis")) { + const Tensor *axis = ctx.Input("Axis"); + const auto &index_type = index->type(); + const auto &axis_type = axis->type(); + auto place = ctx.GetPlace(); + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT32) { + GatherV2CUDAFunction(x, index, axis, output, place, + ctx); + } + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT64) { + GatherV2CUDAFunction(x, index, axis, output, place, + ctx); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT32) { + GatherV2CUDAFunction(x, index, axis, output, place, + ctx); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT64) { + GatherV2CUDAFunction(x, index, axis, output, place, + ctx); + } + return; + } output->mutable_data(ctx.GetPlace()); if (x->numel() == 0) return; const auto &index_type = index->type(); @@ -64,6 +91,34 @@ class GatherGradOpCUDAKernel : public framework::OpKernel { auto *dX = ctx.Output(framework::GradVarName("X")); auto *dO = ctx.Input(framework::GradVarName("Out")); + if (ctx.HasInput("Axis")) { + const Tensor *axis = ctx.Input("Axis"); + const auto &index_type = index->type(); + const auto &axis_type = axis->type(); + auto place = ctx.GetPlace(); + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT32) { + GatherV2GradCUDAFunction(dO, index, axis, dX, + place, ctx); + } + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT64) { + GatherV2GradCUDAFunction(dO, index, axis, dX, + place, ctx); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT32) { + GatherV2GradCUDAFunction(dO, index, axis, dX, + place, ctx); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT64) { + GatherV2GradCUDAFunction(dO, index, axis, dX, + place, ctx); + } + return; + } + dX->mutable_data(ctx.GetPlace()); auto dxt = framework::EigenVector::Flatten(*dX); auto &place = *ctx.template device_context() diff --git a/paddle/fluid/operators/gather_op.h b/paddle/fluid/operators/gather_op.h index e4ce13ca8fc0b49e997749d0f47f15213a3b44f7..8ec0d6ce0b69c791f9bff58f1681f8d4543c57dd 100644 --- a/paddle/fluid/operators/gather_op.h +++ b/paddle/fluid/operators/gather_op.h @@ -35,6 +35,30 @@ class GatherOpKernel : public framework::OpKernel { auto *index = ctx.Input("Index"); auto *output = ctx.Output("Out"); + if (ctx.HasInput("Axis")) { + const Tensor *axis = ctx.Input("Axis"); + const auto &index_type = index->type(); + const auto &axis_type = axis->type(); + auto place = ctx.GetPlace(); + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT32) { + GatherV2Function(x, index, axis, output, place); + } + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT64) { + GatherV2Function(x, index, axis, output, place); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT32) { + GatherV2Function(x, index, axis, output, place); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT64) { + GatherV2Function(x, index, axis, output, place); + } + return; + } + output->mutable_data(ctx.GetPlace()); if (x->numel() == 0) return; @@ -70,6 +94,30 @@ class GatherGradientOpKernel : public framework::OpKernel { auto *dX = ctx.Output(framework::GradVarName("X")); auto *dO = ctx.Input(framework::GradVarName("Out")); + if (ctx.HasInput("Axis")) { + const Tensor *axis = ctx.Input("Axis"); + const auto &index_type = index->type(); + const auto &axis_type = axis->type(); + auto place = ctx.GetPlace(); + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT32) { + GatherV2GradFunction(dO, index, axis, dX, place); + } + if (index_type == framework::proto::VarType::INT32 && + axis_type == framework::proto::VarType::INT64) { + GatherV2GradFunction(dO, index, axis, dX, place); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT32) { + GatherV2GradFunction(dO, index, axis, dX, place); + } + if (index_type == framework::proto::VarType::INT64 && + axis_type == framework::proto::VarType::INT64) { + GatherV2GradFunction(dO, index, axis, dX, place); + } + return; + } + dX->mutable_data(ctx.GetPlace()); auto dxt = framework::EigenVector::Flatten(*dX); auto &place = *ctx.template device_context() diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index 253078751ce66dd2a6d52dbdd5fe6b5c0ed21849..111d4ad4490074fb53671f6f3180cf17c5abe913 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/fill_constant_op.h" #ifdef PADDLE_WITH_MKLDNN @@ -31,22 +33,29 @@ class CPUGaussianRandomKernel : public framework::OpKernel { float std = context.Attr("std"); auto* tensor = context.Output("Out"); - unsigned int seed = static_cast(context.Attr("seed")); - std::minstd_rand engine; - if (seed == 0) { - seed = std::random_device()(); - } - engine.seed(seed); std::normal_distribution dist(mean, std); - const std::string op_type = "gaussian_random"; auto shape = GetShape(context, op_type); tensor->Resize(shape); int64_t size = tensor->numel(); T* data = tensor->mutable_data(context.GetPlace()); - for (int64_t i = 0; i < size; ++i) { - data[i] = dist(engine); + if (framework::Generator::GetInstance()->is_init_py) { + std::mt19937_64& gen_engine = + framework::Generator::GetInstance()->GetCPUEngine(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(gen_engine); + } + } else { + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } } } }; diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index 3bf34fc685ee8af39b66f444c35d606c4b5d8ffb..93f9e108723fbd56e0d3bf5d439614c2c20bb393 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -41,13 +41,14 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel { int n = input->dims()[0]; int c = input->dims()[1]; - int h = input->dims()[2]; - int w = input->dims()[3]; - const int size[4] = {n, c, h, w}; + int out_h = grid->dims()[1]; + int out_w = grid->dims()[2]; + const int size[4] = {n, c, out_h, out_w}; const T* input_data = input->data(); const T* grid_data = grid->data(); - T* output_data = output->mutable_data({n, c, h, w}, ctx.GetPlace()); + T* output_data = + output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); ScopedSpatialTransformerDescriptor st_desc; cudnnSpatialTransformerDescriptor_t cudnn_st_desc = @@ -97,7 +98,7 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel { const T* grid_data = grid->data(); const T* output_grad_data = output_grad->data(); T* input_grad_data = - input_grad->mutable_data(output_grad_dims, ctx.GetPlace()); + input_grad->mutable_data(input->dims(), ctx.GetPlace()); T* grid_grad_data = grid_grad->mutable_data({n, h, w, 2}, ctx.GetPlace()); diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 5be490379642e8761a6821fa0dc0d332ca5b41ef..deb71b807128e5c0b173b517e60832894ced41e5 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/grid_sampler_op.h" #include +#include #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cudnn_helper.h" @@ -58,21 +59,10 @@ class GridSampleOp : public framework::OperatorWithKernel { "Input(X) and Input(Grid) dimension[0] should be equal, but " "received X dimension[0](%d) != Grid dimension[0](%d)", x_dims[0], grid_dims[0])); - PADDLE_ENFORCE_EQ( - grid_dims[1], x_dims[2], - platform::errors::InvalidArgument( - "Input(X) dims[2] and Input(Grid) dims[1] should be equal, but " - "received X dimension[2](%d) != Grid dimension[1](%d)", - x_dims[2], grid_dims[1])); - PADDLE_ENFORCE_EQ( - grid_dims[2], x_dims[3], - platform::errors::InvalidArgument( - "Input(X) dims[3] and Input(Grid) dims[2] should be equal, but " - "received X dimension[3](%d) != Grid dimension[2](%d)", - x_dims[3], grid_dims[2])); } - ctx->SetOutputDim("Output", x_dims); + ctx->SetOutputDim("Output", + {x_dims[0], x_dims[1], grid_dims[1], grid_dims[2]}); ctx->ShareLoD("X", "Output"); } @@ -108,15 +98,37 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker { "(bool, default true) Only used in cudnn kernel, need install cudnn") .SetDefault(true); + AddAttr( + "align_corners", + "(bool, default true) If align_corners is true, it will project" + "-1 and 1 to the centers of the corner pixels. Otherwise, it will " + "project" + "-1 and 1 to the image edges.") + .SetDefault(true); + + AddAttr( + "mode", + "(bool, default true) The interpolation method which can be 'bilinear'" + " or 'nearest'.") + .SetDefault("bilinear"); + + AddAttr( + "padding_mode", + "(bool, default true) The padding method used when source" + "index is out of input images. It can be 'zeros', 'reflect' and " + "'border'.") + .SetDefault("zeros"); + AddComment(R"DOC( - This operation samples input X by using bilinear interpolation based on + This operation samples input X by using bilinear or nearest interpolation based on flow field grid, which is usually generated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates with shape [N, H, W] each, where grid_x is indexing the 4th dimension (in width dimension) of input data x and grid_y is indexing the 3rd dimension (in height dimension), finally results is the bilinear - interpolation value of 4 nearest corner points. + interpolation value or nearest value of 4 nearest corner points. + For bilinear interpolation mode: Step 1: Get (x, y) grid coordinates and scale to [0, H-1/W-1]. diff --git a/paddle/fluid/operators/grid_sampler_op.cu b/paddle/fluid/operators/grid_sampler_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..7e1e7b1e6929a6fce3a315b4e4711794bc6649b7 --- /dev/null +++ b/paddle/fluid/operators/grid_sampler_op.cu @@ -0,0 +1,490 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/grid_sampler_op.h" +#include "paddle/fluid/platform/cuda_device_function.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace operators { + +static __forceinline__ __device__ bool in_bounds(int h, int w, int H, int W) { + return h >= 0 && h < H && w >= 0 && w < W; +} + +template +static __forceinline__ __device__ void atomic_add(T* data, int h, int w, int sH, + int sW, int H, int W, + T delta) { + if (in_bounds(h, w, H, W)) { + atomicAdd(data + h * sH + w * sW, delta); + } +} + +template +static __forceinline__ __device__ T _unnormalize(T coord, int size, + bool align_corners) { + if (align_corners) { + return ((coord + 1.f) / 2) * (size - 1); + } else { + return ((coord + 1.f) * size - 1) / 2; + } +} + +template +static __forceinline__ __device__ T clip_indexes(T in, int max_value) { + return min(static_cast(max_value), max(in, static_cast(0))); +} + +template +static __forceinline__ __device__ T reflect_indexes(T in, int twice_low, + int twice_high) { + if (twice_low == twice_high) { + return static_cast(0); + } + T min = static_cast(twice_low) / 2; + T span = static_cast(twice_high - twice_low) / 2; + in = fabs(in - min); + T extra = fmod(in, span); + int flips = static_cast(floor(in / span)); + if (flips % 2 == 0) { + return extra + min; + } else { + return span - extra + min; + } +} + +template +static __forceinline__ __device__ T compute_positions(T coord, int size, + PaddingMode padding_mode, + bool align_corners) { + coord = _unnormalize(coord, size, align_corners); + if (padding_mode == PaddingMode::border) { + coord = clip_indexes(coord, size - 1); + } else if (padding_mode == PaddingMode::reflect) { + if (align_corners) { + coord = reflect_indexes(coord, 0, 2 * (size - 1)); + } else { + coord = reflect_indexes(coord, -1, 2 * size - 1); + } + coord = clip_indexes(coord, size - 1); + } + return coord; +} + +template +static __forceinline__ __device__ T _unnormalize_with_mask(T coord, int size, + bool align_corners, + T* grad_in) { + if (align_corners) { + *grad_in = static_cast(size - 1) / 2; + return ((coord + 1.f) / 2) * (size - 1); + } else { + *grad_in = static_cast(size) / 2; + return ((coord + 1.f) * size - 1) / 2; + } +} + +template +static __forceinline__ __device__ T clip_indexes_with_mask(T in, int clip_limit, + T* grad_in) { + if (in <= static_cast(0)) { + *grad_in = static_cast(0); + return static_cast(0); + } else { + T max = static_cast(clip_limit - 1); + if (in >= max) { + *grad_in = static_cast(0); + return max; + } else { + *grad_in = static_cast(1); + return in; + } + } +} + +template +static __forceinline__ __device__ T +reflect_indexes_with_mask(T in, int twice_low, int twice_high, T* grad_in) { + if (twice_low == twice_high) { + *grad_in = static_cast(0); + return static_cast(0); + } + int grad_in_mult_; + T min = static_cast(twice_low) / 2; + T span = static_cast(twice_high - twice_low) / 2; + in = in - min; + if (in < static_cast(0)) { + grad_in_mult_ = -1; + in = -in; + } else { + grad_in_mult_ = 1; + } + T extra = fmod(in, span); + int flips = static_cast(floor(in / span)); + if (flips % 2 == 0) { + *grad_in = static_cast(grad_in_mult_); + return extra + min; + } else { + *grad_in = static_cast(-grad_in_mult_); + return span - extra + min; + } +} + +template +static __forceinline__ __device__ T +compute_positions_with_mask(T coord, int size, PaddingMode padding_mode, + bool align_corners, T* grad_in) { + T grad_clip, grad_refl; + coord = _unnormalize_with_mask(coord, size, align_corners, grad_in); + if (padding_mode == PaddingMode::border) { + coord = clip_indexes_with_mask(coord, size, &grad_clip); + *grad_in = (*grad_in) * grad_clip; + } else if (padding_mode == PaddingMode::reflect) { + if (align_corners) { + coord = reflect_indexes_with_mask(coord, 0, 2 * (size - 1), &grad_refl); + } else { + coord = reflect_indexes_with_mask(coord, -1, 2 * size - 1, &grad_refl); + } + coord = clip_indexes_with_mask(coord, size, &grad_clip); + *grad_in = (*grad_in) * grad_refl * grad_clip; + } + + return coord; +} + +template +__global__ void grid_sample_cuda_kernel(const int nthreads, int n, int out_c, + int out_h, int out_w, int in_h, + int in_w, const T* input, const T* grid, + T* output, const Mode mode, + const PaddingMode padding_mode, + bool align_corners) { + int inp_sN = out_c * in_h * in_w; + + int inp_sC = in_h * in_w; + int inp_sH = in_w; + int inp_sW = 1; + int grid_sN = out_h * out_w * 2; + int grid_sH = out_w * 2; + int grid_sW = 2; + int grid_sCoor = 1; + int out_sN = out_c * out_h * out_w; + int out_sC = out_h * out_w; + int out_sH = out_w; + int out_sW = 1; + + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % out_w; + const int h = (index / out_w) % out_h; + const int n = index / (out_h * out_w); + const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW; + + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + + ix = compute_positions(ix, in_w, padding_mode, align_corners); + iy = compute_positions(iy, in_h, padding_mode, align_corners); + + if (mode == Mode::bilinear) { + int ix_nw = static_cast(floor(ix)); + int iy_nw = static_cast(floor(iy)); + int ix_ne = ix_nw + 1; + int iy_ne = iy_nw; + int ix_sw = ix_nw; + int iy_sw = iy_nw + 1; + int ix_se = ix_nw + 1; + int iy_se = iy_nw + 1; + + T nw = (ix_se - ix) * (iy_se - iy); + T ne = (ix - ix_sw) * (iy_sw - iy); + T sw = (ix_ne - ix) * (iy - iy_ne); + T se = (ix - ix_nw) * (iy - iy_nw); + + auto inp_offset_NC = n * inp_sN; + auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW; + for (int c = 0; c < out_c; + ++c, inp_offset_NC += inp_sC, out_ptr_NCHW += out_sC) { + *out_ptr_NCHW = static_cast(0); + if (in_bounds(iy_nw, ix_nw, in_h, in_w)) { + *out_ptr_NCHW += + input[inp_offset_NC + iy_nw * inp_sH + ix_nw * inp_sW] * nw; + } + if (in_bounds(iy_ne, ix_ne, in_h, in_w)) { + *out_ptr_NCHW += + input[inp_offset_NC + iy_ne * inp_sH + ix_ne * inp_sW] * ne; + } + if (in_bounds(iy_sw, ix_sw, in_h, in_w)) { + *out_ptr_NCHW += + input[inp_offset_NC + iy_sw * inp_sH + ix_sw * inp_sW] * sw; + } + if (in_bounds(iy_se, ix_se, in_h, in_w)) { + *out_ptr_NCHW += + input[inp_offset_NC + iy_se * inp_sH + ix_se * inp_sW] * se; + } + } + } else if (mode == Mode::nearest) { + int ix_nearest = static_cast(round(ix)); + int iy_nearest = static_cast(round(iy)); + + auto inp_offset_NC = n * inp_sN; + auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW; + for (int c = 0; c < out_c; + ++c, inp_offset_NC += inp_sC, out_ptr_NCHW += out_sC) { + if (in_bounds(iy_nearest, ix_nearest, in_h, in_w)) { + *out_ptr_NCHW = + input[inp_offset_NC + iy_nearest * inp_sH + ix_nearest * inp_sW]; + } else { + *out_ptr_NCHW = static_cast(0); + } + } + } + } +} + +template +class GridSampleOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = ctx.cuda_device_context(); + auto align_corners = ctx.Attr("align_corners"); + auto padding_mode_s = ctx.Attr("padding_mode"); + auto mode_s = ctx.Attr("mode"); + PaddingMode padding_mode; + Mode mode; + if (padding_mode_s == "border") { + padding_mode = PaddingMode::border; + } else if (padding_mode_s == "reflect") { + padding_mode = PaddingMode::reflect; + } else { + padding_mode = PaddingMode::zeros; + } + + if (mode_s == "nearest") { + mode = Mode::nearest; + } else { + mode = Mode::bilinear; + } + + auto* input = ctx.Input("X"); + auto* grid = ctx.Input("Grid"); + const int n = grid->dims()[0]; + const int out_h = grid->dims()[1]; + const int out_w = grid->dims()[2]; + const int c = input->dims()[1]; + const int in_h = input->dims()[2]; + const int in_w = input->dims()[3]; + VLOG(3) << "n: " << n << "; c: " << c << "; out_h: " << out_h + << "; out_w: " << out_w; + auto* output = ctx.Output("Output"); + auto* output_data = output->mutable_data(ctx.GetPlace()); + + VLOG(3) << "set constant"; + math::SetConstant()( + dev_ctx, output, static_cast(0)); + int count = static_cast(n * out_h * out_w); + + auto cu_stream = dev_ctx.stream(); + + int block = 512; + int grid_size = (count + block - 1) / block; + grid_sample_cuda_kernel<<>>( + count, n, c, out_h, out_w, in_h, in_w, input->data(), + grid->data(), output_data, mode, padding_mode, align_corners); + } +}; + +template +__global__ void grid_sampler_cuda_backward_kernel( + const int nthreads, const T* grad_output, const T* input, const T* grid, + int n, int out_c, int out_h, int out_w, int in_h, int in_w, T* grad_input, + T* grad_grid, const Mode mode, const PaddingMode padding_mode, + bool align_corners) { + int inp_sN = out_c * in_h * in_w; + int inp_sC = in_h * in_w; + int inp_sH = in_w; + int inp_sW = 1; + int grid_sN = out_h * out_w * 2; + int grid_sH = out_w * 2; + int grid_sW = 2; + int grid_sCoor = 1; + + int gOut_sN = out_c * out_h * out_w; + int gOut_sC = out_h * out_w; + int gOut_sH = out_w; + int gOut_sW = 1; + + CUDA_KERNEL_LOOP(index, nthreads) { + const int w = index % out_w; + const int h = (index / out_w) % out_h; + const int n = index / (out_h * out_w); + const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW; + + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + + T gix_mult, giy_mult; + ix = compute_positions_with_mask(ix, in_w, padding_mode, align_corners, + &gix_mult); + iy = compute_positions_with_mask(iy, in_h, padding_mode, align_corners, + &giy_mult); + + if (mode == Mode::bilinear) { + int ix_nw = static_cast(floor(ix)); + int iy_nw = static_cast(floor(iy)); + int ix_ne = ix_nw + 1; + int iy_ne = iy_nw; + int ix_sw = ix_nw; + int iy_sw = iy_nw + 1; + int ix_se = ix_nw + 1; + int iy_se = iy_nw + 1; + + T nw = (ix_se - ix) * (iy_se - iy); + T ne = (ix - ix_sw) * (iy_sw - iy); + T sw = (ix_ne - ix) * (iy - iy_ne); + T se = (ix - ix_nw) * (iy - iy_nw); + + T gix = static_cast(0), giy = static_cast(0); + int gOut_offset = n * gOut_sN + h * gOut_sH + w * gOut_sW; + T* gInp_ptr_NC = grad_input + n * inp_sN; + int inp_offset_NC = n * inp_sN; + for (int c = 0; c < out_c; ++c, inp_offset_NC += inp_sC, + gInp_ptr_NC += inp_sC, gOut_offset += gOut_sC) { + T gOut = grad_output[gOut_offset]; + + atomic_add(gInp_ptr_NC, iy_nw, ix_nw, inp_sH, inp_sW, in_h, in_w, + nw * gOut); + atomic_add(gInp_ptr_NC, iy_ne, ix_ne, inp_sH, inp_sW, in_h, in_w, + ne * gOut); + atomic_add(gInp_ptr_NC, iy_sw, ix_sw, inp_sH, inp_sW, in_h, in_w, + sw * gOut); + atomic_add(gInp_ptr_NC, iy_se, ix_se, inp_sH, inp_sW, in_h, in_w, + se * gOut); + + if (in_bounds(iy_nw, ix_nw, in_h, in_w)) { + T nw_val = input[inp_offset_NC + iy_nw * inp_sH + ix_nw * inp_sW]; + gix -= nw_val * (iy_se - iy) * gOut; + giy -= nw_val * (ix_se - ix) * gOut; + } + if (in_bounds(iy_ne, ix_ne, in_h, in_w)) { + T ne_val = input[inp_offset_NC + iy_ne * inp_sH + ix_ne * inp_sW]; + gix += ne_val * (iy_sw - iy) * gOut; + giy -= ne_val * (ix - ix_sw) * gOut; + } + if (in_bounds(iy_sw, ix_sw, in_h, in_w)) { + T sw_val = input[inp_offset_NC + iy_sw * inp_sH + ix_sw * inp_sW]; + gix -= sw_val * (iy - iy_ne) * gOut; + giy += sw_val * (ix_ne - ix) * gOut; + } + if (in_bounds(iy_se, ix_se, in_h, in_w)) { + T se_val = input[inp_offset_NC + iy_se * inp_sH + ix_se * inp_sW]; + gix += se_val * (iy - iy_nw) * gOut; + giy += se_val * (ix - ix_nw) * gOut; + } + } + + T* gGrid_ptr_NHW = grad_grid + index * grid_sW; + gGrid_ptr_NHW[0] = gix_mult * gix; + gGrid_ptr_NHW[1] = giy_mult * giy; + } else if (mode == Mode::nearest) { + int ix_nearest = static_cast(::round(ix)); + int iy_nearest = static_cast(::round(iy)); + + int gOut_offset = n * gOut_sN + h * gOut_sH + w * gOut_sW; + T* gInp_ptr_NC = grad_input + n * inp_sN; + for (int c = 0; c < out_c; + ++c, gInp_ptr_NC += inp_sC, gOut_offset += gOut_sC) { + atomic_add(gInp_ptr_NC, iy_nearest, ix_nearest, inp_sH, inp_sW, in_h, + in_w, grad_output[gOut_offset]); + } + + T* gGrid_ptr_NHW = grad_grid + index * grid_sW; + gGrid_ptr_NHW[0] = static_cast(0); + gGrid_ptr_NHW[1] = static_cast(0); + } + } +} + +template +class GridSampleGradOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = ctx.cuda_device_context(); + auto align_corners = ctx.Attr("align_corners"); + auto padding_mode_s = ctx.Attr("padding_mode"); + auto mode_s = ctx.Attr("mode"); + + PaddingMode padding_mode; + Mode mode; + if (padding_mode_s == "border") { + padding_mode = PaddingMode::border; + } else if (padding_mode_s == "reflect") { + padding_mode = PaddingMode::reflect; + } else { + padding_mode = PaddingMode::zeros; + } + + if (mode_s == "nearest") { + mode = Mode::nearest; + } else { + mode = Mode::bilinear; + } + + auto* input = ctx.Input("X"); + auto* grid = ctx.Input("Grid"); + auto* output_grad = ctx.Input(framework::GradVarName("Output")); + + const int n = grid->dims()[0]; + const int out_h = grid->dims()[1]; + const int out_w = grid->dims()[2]; + const int c = input->dims()[1]; + const int in_h = input->dims()[2]; + const int in_w = input->dims()[3]; + + auto* input_grad = ctx.Output(framework::GradVarName("X")); + input_grad->mutable_data(ctx.GetPlace()); + math::SetConstant()( + ctx.template device_context(), + input_grad, static_cast(0)); + auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); + grid_grad->mutable_data(ctx.GetPlace()); + math::SetConstant()( + ctx.template device_context(), + grid_grad, static_cast(0)); + + int count = static_cast(n * out_h * out_w); + auto cu_stream = dev_ctx.stream(); + int block = 512; + int grid_size = (count + block - 1) / block; + grid_sampler_cuda_backward_kernel<<>>( + count, output_grad->data(), input->data(), grid->data(), n, c, + out_h, out_w, in_h, in_w, input_grad->data(), grid_grad->data(), + mode, padding_mode, align_corners); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(grid_sampler, ops::GridSampleOpCUDAKernel, + ops::GridSampleOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL(grid_sampler_grad, + ops::GridSampleGradOpCUDAKernel, + ops::GridSampleGradOpCUDAKernel); diff --git a/paddle/fluid/operators/grid_sampler_op.h b/paddle/fluid/operators/grid_sampler_op.h index 08a6043eb07a6e44d46428ee195f6cb28c2ee77c..eda800e78faf5da2bb379b8101e4823c5bc2d2f8 100644 --- a/paddle/fluid/operators/grid_sampler_op.h +++ b/paddle/fluid/operators/grid_sampler_op.h @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" @@ -22,6 +25,13 @@ limitations under the License. */ namespace paddle { namespace operators { +enum class Mode { + bilinear, + nearest, +}; + +enum class PaddingMode { zeros, border, reflect }; + using Tensor = framework::Tensor; template @@ -39,64 +49,229 @@ static inline bool isInBound(T x, T y, T x_max, T y_max) { } template -static void CalcGridLocations(const platform::CPUDeviceContext& ctx, - const Tensor& grid, Tensor* x_w, Tensor* x_e, - Tensor* y_n, Tensor* y_s, Tensor* d_w, - Tensor* d_e, Tensor* d_n, Tensor* d_s) { +static inline void unnormalize(const platform::CPUDeviceContext& ctx, + Tensor* grid_slice, + const int max_val, // height-1 or width-1 + bool align_corners) { auto& place = *ctx.eigen_device(); + auto grid_slice_t = EigenTensor::From(*grid_slice); + + if (!align_corners) { + auto factor = static_cast((max_val + 1) * 0.5); + grid_slice_t.device(place) = + (grid_slice_t + static_cast(1)) * factor - static_cast(0.5); + } else { + auto factor = static_cast(max_val * 0.5); + grid_slice_t.device(place) = (grid_slice_t + static_cast(1)) * factor; + } +} + +template +static inline void clip(const platform::CPUDeviceContext& ctx, + Tensor* grid_slice, + const int max_val, // height-1 or width-1 + bool align_corners, std::string padding_mode) { + auto& place = *ctx.eigen_device(); + auto grid_slice_t = EigenTensor::From(*grid_slice); + if (padding_mode == "border") { + grid_slice_t.device(place) = grid_slice_t.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + } else if (padding_mode == "reflect") { + if (align_corners) { + auto double_range = static_cast(max_val * 2); + auto grid_abs = grid_slice_t.abs(); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + grid_slice_t.device(place) = extra.cwiseMin(double_range - extra); + } else { + auto double_range = static_cast((max_val + 1) * 2); + auto grid_abs = (grid_slice_t + static_cast(0.5)).abs(); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + grid_slice_t.device(place) = + extra.cwiseMin(double_range - extra) - static_cast(0.5); + grid_slice_t.device(place) = grid_slice_t.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + } + } +} + +template +static inline void clipWithMask(const platform::CPUDeviceContext& ctx, + const int max_val, // height-1 or width-1 + bool align_corners, std::string padding_mode, + Tensor* grid_slice, Tensor* grid_scale) { + auto& place = *ctx.eigen_device(); + grid_scale->mutable_data(grid_slice->dims(), ctx.GetPlace()); + + auto grid_slice_t = EigenTensor::From(*grid_slice); + auto factor = static_cast(max_val * 0.5); + if (!align_corners) { + factor = static_cast((max_val + 1) * 0.5); + } + auto grid_scale_t = EigenTensor::From(*grid_scale).setConstant(factor); + + if (padding_mode == "border") { + // auto bounded_lo = grid_slice_t.cwiseMax(static_cast(0)); + auto res = grid_slice_t.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + + auto in_bound = (res == grid_slice_t); + grid_scale_t.device(place) = grid_scale_t * in_bound.template cast(); + grid_slice_t.device(place) = res; + } else if (padding_mode == "reflect") { + if (align_corners) { + auto double_range = static_cast(max_val * 2); + auto is_neg = (grid_slice_t < static_cast(0)); + auto grid_abs = grid_slice_t.abs(); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + auto one_more_flip = (extra > (double_range - extra)); + grid_scale_t.device(place) = + grid_scale_t * ((is_neg == one_more_flip).template cast() - + (is_neg != one_more_flip).template cast()); + grid_slice_t.device(place) = extra.cwiseMin(double_range - extra); + } else { + auto double_range = static_cast((max_val + 1) * 2); + auto grid_abs = (grid_slice_t + static_cast(0.5)).abs(); + auto is_neg = ((grid_slice_t + static_cast(0.5)) < static_cast(0)); + auto extra = grid_abs - (grid_abs / double_range).floor() * double_range; + auto one_more_flip = (extra > (double_range - extra)); + auto reflected = + extra.cwiseMin(double_range - extra) - static_cast(0.5); + auto clipped = reflected.cwiseMax(static_cast(0)) + .cwiseMin(static_cast(max_val)); + auto in_bound = (clipped == reflected).template cast(); + grid_scale_t.device(place) = + grid_scale_t * ((is_neg == one_more_flip).template cast() - + (is_neg != one_more_flip).template cast()) * + in_bound; + grid_slice_t.device(place) = clipped; + } + } +} + +template +static void calcGridLocations(const platform::CPUDeviceContext& ctx, + const Tensor& grid, const int in_h, + const int in_w, bool align_corners, + std::string padding_mode, Tensor* grid_x, + Tensor* grid_y) { const int n = grid.dims()[0]; - const int h = grid.dims()[1]; - const int w = grid.dims()[2]; - const T x_max = static_cast(w - 1); - const T y_max = static_cast(h - 1); + const int out_h = grid.dims()[1]; + const int out_w = grid.dims()[2]; // split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim - Tensor grid_x, grid_y; - T* grid_x_data = grid_x.mutable_data({n, h, w}, ctx.GetPlace()); - T* grid_y_data = grid_y.mutable_data({n, h, w}, ctx.GetPlace()); + T* grid_x_data = grid_x->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + T* grid_y_data = grid_y->mutable_data({n, out_h, out_w}, ctx.GetPlace()); const T* grid_data = grid.data(); - for (int i = 0; i < n * h * w; i++) { + for (int i = 0; i < n * out_h * out_w; i++) { grid_x_data[i] = grid_data[2 * i]; grid_y_data[i] = grid_data[(2 * i) + 1]; } - Tensor ones; - ones.mutable_data({n, h, w}, ctx.GetPlace()); - auto ones_t = EigenTensor::From(ones).setConstant(1.0); - Tensor half_xmax; - Tensor half_ymax; - half_xmax.mutable_data({n, h, w}, ctx.GetPlace()); - auto half_xmax_t = - EigenTensor::From(half_xmax).setConstant(0.5 * x_max); - half_ymax.mutable_data({n, h, w}, ctx.GetPlace()); - auto half_ymax_t = - EigenTensor::From(half_ymax).setConstant(0.5 * y_max); - - // scale grid to [0, h-1/w-1] - auto grid_x_t = EigenTensor::From(grid_x); - auto grid_y_t = EigenTensor::From(grid_y); - grid_x_t.device(place) = (grid_x_t + ones_t) * half_xmax_t; - grid_y_t.device(place) = (grid_y_t + ones_t) * half_ymax_t; + unnormalize(ctx, grid_x, in_w - 1, align_corners); + unnormalize(ctx, grid_y, in_h - 1, align_corners); + + clip(ctx, grid_x, in_w - 1, align_corners, padding_mode); + clip(ctx, grid_y, in_h - 1, align_corners, padding_mode); +} + +template +static void calcGridLocationsWithGrad(const platform::CPUDeviceContext& ctx, + const Tensor& grid, const int in_h, + const int in_w, bool align_corners, + std::string padding_mode, Tensor* grid_x, + Tensor* grid_y, Tensor* grid_x_scale, + Tensor* grid_y_scale) { + const int n = grid.dims()[0]; + const int out_h = grid.dims()[1]; + const int out_w = grid.dims()[2]; + + // split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim + T* grid_x_data = grid_x->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + T* grid_y_data = grid_y->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + + const T* grid_data = grid.data(); + for (int i = 0; i < n * out_h * out_w; i++) { + grid_x_data[i] = grid_data[2 * i]; + grid_y_data[i] = grid_data[(2 * i) + 1]; + } + unnormalize(ctx, grid_x, in_w - 1, align_corners); + unnormalize(ctx, grid_y, in_h - 1, align_corners); + + clipWithMask(ctx, in_w - 1, align_corners, padding_mode, grid_x, + grid_x_scale); + clipWithMask(ctx, in_h - 1, align_corners, padding_mode, grid_y, + grid_y_scale); +} + +template +static void getGridPointValue(const Tensor& input, Tensor* output, + const Tensor& x, const Tensor& y) { + const int n = input.dims()[0]; + const int c = input.dims()[1]; + const int in_h = input.dims()[2]; + const int in_w = input.dims()[3]; + const int out_h = x.dims()[1]; + const int out_w = x.dims()[2]; + auto x_t = EigenTensor::From(x); + auto y_t = EigenTensor::From(y); + auto output_t = EigenTensor::From(*output).setConstant((T)0); + auto input_t = EigenTensor::From(input); + + for (int i = 0; i < n; i++) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(in_w - 1), + (T)(in_h - 1))) { + for (int j = 0; j < c; j++) { + output_t(i, j, k, l) = + input_t(i, j, static_cast(round(y_t(i, k, l))), + static_cast(round(x_t(i, k, l)))); + } + } + } + } + } +} + +template +static void allNeigbors(const platform::CPUDeviceContext& ctx, + const Tensor& input, Tensor* grid_x, Tensor* grid_y, + Tensor* x_w, Tensor* x_e, Tensor* y_n, + Tensor* y_s, // positions + Tensor* d_w, Tensor* d_e, Tensor* d_n, + Tensor* d_s, // distance + Tensor* v_wn, Tensor* v_en, Tensor* v_ws, + Tensor* v_es) { // values + auto& place = *ctx.eigen_device(); + + const int c = input.dims()[1]; + const int n = grid_x->dims()[0]; + const int out_h = grid_x->dims()[1]; + const int out_w = grid_x->dims()[2]; // calculate coords of 4 corner points - x_w->mutable_data({n, h, w}, ctx.GetPlace()); - x_e->mutable_data({n, h, w}, ctx.GetPlace()); - y_n->mutable_data({n, h, w}, ctx.GetPlace()); - y_s->mutable_data({n, h, w}, ctx.GetPlace()); + x_w->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + x_e->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + y_n->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + y_s->mutable_data({n, out_h, out_w}, ctx.GetPlace()); auto x_w_t = EigenTensor::From(*x_w); auto x_e_t = EigenTensor::From(*x_e); auto y_n_t = EigenTensor::From(*y_n); auto y_s_t = EigenTensor::From(*y_s); + + auto grid_x_t = EigenTensor::From(*grid_x); + auto grid_y_t = EigenTensor::From(*grid_y); + x_w_t.device(place) = grid_x_t.floor(); - x_e_t.device(place) = x_w_t + ones_t; + x_e_t.device(place) = x_w_t + static_cast(1); y_n_t.device(place) = grid_y_t.floor(); - y_s_t.device(place) = y_n_t + ones_t; + y_s_t.device(place) = y_n_t + static_cast(1); // calculate distances to 4 sides - d_w->mutable_data({n, h, w}, ctx.GetPlace()); - d_e->mutable_data({n, h, w}, ctx.GetPlace()); - d_n->mutable_data({n, h, w}, ctx.GetPlace()); - d_s->mutable_data({n, h, w}, ctx.GetPlace()); + d_w->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + d_e->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + d_n->mutable_data({n, out_h, out_w}, ctx.GetPlace()); + d_s->mutable_data({n, out_h, out_w}, ctx.GetPlace()); auto d_w_t = EigenTensor::From(*d_w); auto d_e_t = EigenTensor::From(*d_e); auto d_n_t = EigenTensor::From(*d_n); @@ -105,28 +280,100 @@ static void CalcGridLocations(const platform::CPUDeviceContext& ctx, d_e_t.device(place) = x_e_t - grid_x_t; d_n_t.device(place) = grid_y_t - y_n_t; d_s_t.device(place) = y_s_t - grid_y_t; + + // calc 4 corner points value + v_wn->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + v_en->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + v_ws->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + v_es->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + getGridPointValue(input, v_wn, *x_w, *y_n); + getGridPointValue(input, v_en, *x_e, *y_n); + getGridPointValue(input, v_ws, *x_w, *y_s); + getGridPointValue(input, v_es, *x_e, *y_s); } template -static void GetGridPointValue(const Tensor& input, Tensor* output, - const Tensor& x, const Tensor& y) { - const int n = input.dims()[0]; +static void bilinearInter(const platform::CPUDeviceContext& ctx, + const Tensor& input, Tensor* grid_x, Tensor* grid_y, + Tensor* out) { + auto& place = *ctx.eigen_device(); + const int n = grid_x->dims()[0]; + const int out_h = grid_x->dims()[1]; + const int out_w = grid_x->dims()[2]; const int c = input.dims()[1]; - const int h = input.dims()[2]; - const int w = input.dims()[3]; + + Tensor x_w, x_e, y_n, y_s; + Tensor d_w, d_e, d_n, d_s; + Tensor v_wn, v_en, v_ws, v_es; + + allNeigbors(ctx, input, grid_x, grid_y, &x_w, &x_e, &y_n, &y_s, &d_w, &d_e, + &d_n, &d_s, &v_wn, &v_en, &v_ws, &v_es); + + auto d_w_t = EigenTensor::From(d_w); + auto d_e_t = EigenTensor::From(d_e); + auto d_n_t = EigenTensor::From(d_n); + auto d_s_t = EigenTensor::From(d_s); + + auto d_w_scaled_t = + d_w_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_e_scaled_t = + d_e_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_n_scaled_t = + d_n_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto d_s_scaled_t = + d_s_t.reshape(Array4(n, 1, out_h, out_w)).broadcast(Array4(1, c, 1, 1)); + auto v_wn_t = EigenTensor::From(v_wn); + auto v_en_t = EigenTensor::From(v_en); + auto v_ws_t = EigenTensor::From(v_ws); + auto v_es_t = EigenTensor::From(v_es); + auto output_t = EigenTensor::From(*out); + // bilinear interpolaetion by 4 corner points + output_t.device(place) = v_wn_t * d_e_scaled_t * d_s_scaled_t + + v_en_t * d_w_scaled_t * d_s_scaled_t + + v_ws_t * d_e_scaled_t * d_n_scaled_t + + v_es_t * d_w_scaled_t * d_n_scaled_t; +} + +template +static void nearestInter(const platform::CPUDeviceContext& ctx, + const Tensor& input, Tensor* grid_x, Tensor* grid_y, + Tensor* out) { + auto& place = *ctx.eigen_device(); + + auto grid_x_t = EigenTensor::From(*grid_x); + auto grid_y_t = EigenTensor::From(*grid_y); + grid_x_t = grid_x_t.round(); + grid_y_t = grid_y_t.round(); + getGridPointValue(input, out, *grid_x, *grid_y); +} + +template +static void gatherOutputGradToInputGrad(const Tensor& output_grad, + Tensor* input_grad, const Tensor& x, + const Tensor& y, const Tensor& d1, + const Tensor& d2) { + const int n = output_grad.dims()[0]; + const int c = output_grad.dims()[1]; + const int out_h = output_grad.dims()[2]; + const int out_w = output_grad.dims()[3]; + const int in_h = input_grad->dims()[2]; + const int in_w = input_grad->dims()[3]; auto x_t = EigenTensor::From(x); auto y_t = EigenTensor::From(y); - auto output_t = EigenTensor::From(*output).setConstant((T)0); - auto input_t = EigenTensor::From(input); + auto d1_t = EigenTensor::From(d1); + auto d2_t = EigenTensor::From(d2); + auto input_grad_t = EigenTensor::From(*input_grad); + auto output_grad_t = EigenTensor::From(output_grad); for (int i = 0; i < n; i++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(in_w - 1), + (T)(in_h - 1))) { for (int j = 0; j < c; j++) { - output_t(i, j, k, l) = - input_t(i, j, static_cast(round(y_t(i, k, l))), - static_cast(round(x_t(i, k, l)))); + input_grad_t(i, j, static_cast(round(y_t(i, k, l))), + static_cast(round(x_t(i, k, l)))) += + output_grad_t(i, j, k, l) * d1_t(i, k, l) * d2_t(i, k, l); } } } @@ -135,29 +382,28 @@ static void GetGridPointValue(const Tensor& input, Tensor* output, } template -static void GatherOutputGradToInputGrad(const Tensor& output_grad, +static void gatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input_grad, const Tensor& x, - const Tensor& y, const Tensor& d1, - const Tensor& d2) { + const Tensor& y) { const int n = output_grad.dims()[0]; const int c = output_grad.dims()[1]; - const int h = output_grad.dims()[2]; - const int w = output_grad.dims()[3]; + const int out_h = output_grad.dims()[2]; + const int out_w = output_grad.dims()[3]; + const int in_h = input_grad->dims()[2]; + const int in_w = input_grad->dims()[3]; auto x_t = EigenTensor::From(x); auto y_t = EigenTensor::From(y); - auto d1_t = EigenTensor::From(d1); - auto d2_t = EigenTensor::From(d2); auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); - for (int i = 0; i < n; i++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(in_w - 1), + (T)(in_h - 1))) { for (int j = 0; j < c; j++) { input_grad_t(i, j, static_cast(round(y_t(i, k, l))), static_cast(round(x_t(i, k, l)))) += - output_grad_t(i, j, k, l) * d1_t(i, k, l) * d2_t(i, k, l); + output_grad_t(i, j, k, l); } } } @@ -165,65 +411,126 @@ static void GatherOutputGradToInputGrad(const Tensor& output_grad, } } +template +static void gatherBilinearGrad(const platform::CPUDeviceContext& ctx, + const Tensor& input, const Tensor& output_grad, + Tensor* grid_x, Tensor* grid_y, + Tensor* grid_x_scale, Tensor* grid_y_scale, + Tensor* input_grad, Tensor* grid_grad) { + const int n = grid_x->dims()[0]; + const int out_h = grid_x->dims()[1]; + const int out_w = grid_x->dims()[2]; + const int c = input.dims()[1]; + + Tensor x_w, x_e, y_n, y_s; + Tensor d_w, d_e, d_n, d_s; + Tensor v_wn, v_en, v_ws, v_es; + + allNeigbors(ctx, input, + grid_x, // grid_x + grid_y, // grid_y + &x_w, &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s, &v_wn, &v_en, + &v_ws, &v_es); + + // gather output grad value to input grad by corner point coords and weight + gatherOutputGradToInputGrad(output_grad, input_grad, x_w, y_n, d_e, d_s); + gatherOutputGradToInputGrad(output_grad, input_grad, x_w, y_s, d_e, d_n); + gatherOutputGradToInputGrad(output_grad, input_grad, x_e, y_n, d_w, d_s); + gatherOutputGradToInputGrad(output_grad, input_grad, x_e, y_s, d_w, d_n); + + auto v_wn_t = EigenTensor::From(v_wn); + auto v_en_t = EigenTensor::From(v_en); + auto v_ws_t = EigenTensor::From(v_ws); + auto v_es_t = EigenTensor::From(v_es); + + auto d_w_t = EigenTensor::From(d_w); + auto d_e_t = EigenTensor::From(d_e); + auto d_n_t = EigenTensor::From(d_n); + auto d_s_t = EigenTensor::From(d_s); + + auto output_grad_t = EigenTensor::From(output_grad); + + Tensor grid_grad_x, grid_grad_y; + grid_grad_x.mutable_data({n, out_h, out_w}, ctx.GetPlace()); + grid_grad_y.mutable_data({n, out_h, out_w}, ctx.GetPlace()); + auto grid_grad_x_t = + EigenTensor::From(grid_grad_x).setConstant(static_cast(0.0)); + auto grid_grad_y_t = + EigenTensor::From(grid_grad_y).setConstant(static_cast(0.0)); + for (int i = 0; i < n; i++) { + for (int j = 0; j < c; j++) { + for (int k = 0; k < out_h; k++) { + for (int l = 0; l < out_w; l++) { + grid_grad_x_t(i, k, l) += + ((v_en_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_s_t(i, k, l) + + (v_es_t(i, j, k, l) - v_ws_t(i, j, k, l)) * d_n_t(i, k, l)) * + output_grad_t(i, j, k, l); + grid_grad_y_t(i, k, l) += + ((v_ws_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_e_t(i, k, l) + + (v_es_t(i, j, k, l) - v_en_t(i, j, k, l)) * d_w_t(i, k, l)) * + output_grad_t(i, j, k, l); + } + } + } + } + + // const T x_max = static_cast(in_w - 1); + // const T y_max = static_cast(in_h - 1); + + auto grid_x_scale_t = EigenTensor::From(*grid_x_scale); + auto grid_y_scale_t = EigenTensor::From(*grid_y_scale); + grid_grad_x_t = grid_grad_x_t * grid_x_scale_t; + grid_grad_y_t = grid_grad_y_t * grid_y_scale_t; + + // gather grid_grad [x, y] in 3rd Dim + T* grid_grad_data = grid_grad->data(); + T* grid_grad_x_data = grid_grad_x.data(); + T* grid_grad_y_data = grid_grad_y.data(); + for (int i = 0; i < n * out_h * out_w; i++) { + grid_grad_data[2 * i] = grid_grad_x_data[i]; + grid_grad_data[2 * i + 1] = grid_grad_y_data[i]; + } +} + template class GridSampleOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto& place = *ctx.template device_context().eigen_device(); + auto align_corners = ctx.Attr("align_corners"); + auto padding_mode = ctx.Attr("padding_mode"); + auto mode = ctx.Attr("mode"); + auto* input = ctx.Input("X"); auto* grid = ctx.Input("Grid"); - const int n = input->dims()[0]; + const int n = grid->dims()[0]; + const int out_h = grid->dims()[1]; + const int out_w = grid->dims()[2]; const int c = input->dims()[1]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - - // calc locations and distances of 4 corner points - Tensor x_w, x_e, y_n, y_s; - Tensor d_w, d_e, d_n, d_s; - CalcGridLocations( - ctx.template device_context(), *grid, &x_w, - &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s); + const int in_h = input->dims()[2]; + const int in_w = input->dims()[3]; auto* output = ctx.Output("Output"); - output->mutable_data({n, c, h, w}, ctx.GetPlace()); + output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); math::SetConstant()( ctx.template device_context(), output, static_cast(0)); - // calc 4 corner points value - Tensor v_wn, v_en, v_ws, v_es; - v_wn.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_en.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_ws.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_es.mutable_data({n, c, h, w}, ctx.GetPlace()); - GetGridPointValue(*input, &v_wn, x_w, y_n); - GetGridPointValue(*input, &v_en, x_e, y_n); - GetGridPointValue(*input, &v_ws, x_w, y_s); - GetGridPointValue(*input, &v_es, x_e, y_s); - - auto d_w_t = EigenTensor::From(d_w); - auto d_e_t = EigenTensor::From(d_e); - auto d_n_t = EigenTensor::From(d_n); - auto d_s_t = EigenTensor::From(d_s); - auto d_w_scaled_t = - d_w_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); - auto d_e_scaled_t = - d_e_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); - auto d_n_scaled_t = - d_n_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); - auto d_s_scaled_t = - d_s_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); - auto v_wn_t = EigenTensor::From(v_wn); - auto v_en_t = EigenTensor::From(v_en); - auto v_ws_t = EigenTensor::From(v_ws); - auto v_es_t = EigenTensor::From(v_es); - auto output_t = EigenTensor::From(*output); - // bilinear interpolaetion by 4 corner points - output_t.device(place) = v_wn_t * d_e_scaled_t * d_s_scaled_t + - v_en_t * d_w_scaled_t * d_s_scaled_t + - v_ws_t * d_e_scaled_t * d_n_scaled_t + - v_es_t * d_w_scaled_t * d_n_scaled_t; + Tensor grid_x, grid_y; + calcGridLocations( + ctx.template device_context(), *grid, in_h, + in_w, align_corners, padding_mode, &grid_x, &grid_y); + if (mode == "bilinear") { + bilinearInter( + ctx.template device_context(), *input, + &grid_x, &grid_y, output); + } else if (mode == "nearest") { + auto grid_x_t = EigenTensor::From(grid_x); + auto grid_y_t = EigenTensor::From(grid_y); + grid_x_t = grid_x_t.round(); + grid_y_t = grid_y_t.round(); + getGridPointValue(*input, output, grid_x, grid_y); + } } }; @@ -231,97 +538,48 @@ template class GridSampleGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { + auto align_corners = ctx.Attr("align_corners"); + auto padding_mode = ctx.Attr("padding_mode"); + auto mode = ctx.Attr("mode"); + auto* input = ctx.Input("X"); auto* grid = ctx.Input("Grid"); auto* output_grad = ctx.Input(framework::GradVarName("Output")); - const int n = input->dims()[0]; + const int n = grid->dims()[0]; + const int out_h = grid->dims()[1]; + const int out_w = grid->dims()[2]; const int c = input->dims()[1]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; + const int in_h = input->dims()[2]; + const int in_w = input->dims()[3]; auto* input_grad = ctx.Output(framework::GradVarName("X")); - input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); + input_grad->mutable_data({n, c, in_h, in_w}, ctx.GetPlace()); math::SetConstant()( ctx.template device_context(), input_grad, static_cast(0)); auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); - grid_grad->mutable_data({n, h, w, 2}, ctx.GetPlace()); + grid_grad->mutable_data({n, out_h, out_w, 2}, ctx.GetPlace()); math::SetConstant()( ctx.template device_context(), grid_grad, static_cast(0)); - - Tensor x_w, x_e, y_n, y_s; - Tensor d_w, d_e, d_n, d_s; - CalcGridLocations( - ctx.template device_context(), *grid, &x_w, - &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s); - - // gather output grad value to input grad by corner point coords and weight - GatherOutputGradToInputGrad(*output_grad, input_grad, x_w, y_n, d_e, - d_s); - GatherOutputGradToInputGrad(*output_grad, input_grad, x_w, y_s, d_e, - d_n); - GatherOutputGradToInputGrad(*output_grad, input_grad, x_e, y_n, d_w, - d_s); - GatherOutputGradToInputGrad(*output_grad, input_grad, x_e, y_s, d_w, - d_n); - - // calc 4 corner points value - Tensor v_wn, v_en, v_ws, v_es; - v_wn.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_en.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_ws.mutable_data({n, c, h, w}, ctx.GetPlace()); - v_es.mutable_data({n, c, h, w}, ctx.GetPlace()); - GetGridPointValue(*input, &v_wn, x_w, y_n); - GetGridPointValue(*input, &v_en, x_e, y_n); - GetGridPointValue(*input, &v_ws, x_w, y_s); - GetGridPointValue(*input, &v_es, x_e, y_s); - auto v_wn_t = EigenTensor::From(v_wn); - auto v_en_t = EigenTensor::From(v_en); - auto v_ws_t = EigenTensor::From(v_ws); - auto v_es_t = EigenTensor::From(v_es); - - auto d_w_t = EigenTensor::From(d_w); - auto d_e_t = EigenTensor::From(d_e); - auto d_n_t = EigenTensor::From(d_n); - auto d_s_t = EigenTensor::From(d_s); - - auto output_grad_t = EigenTensor::From(*output_grad); - - Tensor grid_grad_x, grid_grad_y; - grid_grad_x.mutable_data({n, h, w}, ctx.GetPlace()); - grid_grad_y.mutable_data({n, h, w}, ctx.GetPlace()); - auto grid_grad_x_t = EigenTensor::From(grid_grad_x).setConstant(0.0); - auto grid_grad_y_t = EigenTensor::From(grid_grad_y).setConstant(0.0); - for (int i = 0; i < n; i++) { - for (int j = 0; j < c; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - grid_grad_x_t(i, k, l) += - ((v_en_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_s_t(i, k, l) + - (v_es_t(i, j, k, l) - v_ws_t(i, j, k, l)) * d_n_t(i, k, l)) * - output_grad_t(i, j, k, l); - grid_grad_y_t(i, k, l) += - ((v_ws_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_e_t(i, k, l) + - (v_es_t(i, j, k, l) - v_en_t(i, j, k, l)) * d_w_t(i, k, l)) * - output_grad_t(i, j, k, l); - } - } - } - } - const T x_max = static_cast(w - 1); - const T y_max = static_cast(h - 1); - grid_grad_x_t = grid_grad_x_t * (x_max / (T)2); - grid_grad_y_t = grid_grad_y_t * (y_max / (T)2); - - // gather grid_grad [x, y] in 3rd Dim - T* grid_grad_data = grid_grad->data(); - T* grid_grad_x_data = grid_grad_x.data(); - T* grid_grad_y_data = grid_grad_y.data(); - for (int i = 0; i < n * h * w; i++) { - grid_grad_data[2 * i] = grid_grad_x_data[i]; - grid_grad_data[2 * i + 1] = grid_grad_y_data[i]; + Tensor grid_x, grid_y; + Tensor grid_x_scale, grid_y_scale; + calcGridLocationsWithGrad( + ctx.template device_context(), *grid, in_h, + in_w, align_corners, padding_mode, &grid_x, &grid_y, &grid_x_scale, + &grid_y_scale); + if (mode == "bilinear") { + gatherBilinearGrad(ctx.template device_context(), + *input, *output_grad, &grid_x, &grid_y, + &grid_x_scale, &grid_y_scale, input_grad, + grid_grad); + } else { + auto grid_x_t = EigenTensor::From(grid_x); + auto grid_y_t = EigenTensor::From(grid_y); + grid_x_t = grid_x_t.round(); + grid_y_t = grid_y_t.round(); + gatherOutputGradToInputGrad(*output_grad, input_grad, grid_x, grid_y); } } }; diff --git a/paddle/fluid/operators/huber_loss_op.cu b/paddle/fluid/operators/huber_loss_op.cu index 09c743c4275169ba8c53ccbd428100b2fc4483d6..4ce6856a7eade1b314d8aef1d039424ad42e07cf 100644 --- a/paddle/fluid/operators/huber_loss_op.cu +++ b/paddle/fluid/operators/huber_loss_op.cu @@ -16,7 +16,9 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( huber_loss, - ops::HuberLossKernel); + ops::HuberLossKernel, + ops::HuberLossKernel); REGISTER_OP_CUDA_KERNEL( huber_loss_grad, - ops::HuberLossGradKernel); + ops::HuberLossGradKernel, + ops::HuberLossGradKernel); diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..72da43e3bc63c1c585fe19d703892c23ce7b0ec2 --- /dev/null +++ b/paddle/fluid/operators/isfinite_v2_op.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/isfinite_v2_op.h" +#include +#include +#include "paddle/fluid/operators/common_infer_shape_functions.h" +#include "paddle/fluid/platform/float16.h" + +namespace plat = paddle::platform; + +namespace paddle { +namespace operators { + +class OverflowV2Op : public framework::OperatorWithKernel { + public: + OverflowV2Op(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + void InferShape(framework::InferShapeContext *ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "isfinitev2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "isfinitev2"); + UnaryOpUnchangedInferShape(ctx); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + int dtype = -1; + auto *x_var = ctx.InputVar("X"); + if (x_var->IsType()) { + dtype = x_var->Get().type(); + } else if (x_var->IsType()) { + dtype = x_var->Get().value().type(); + } else { + PADDLE_THROW(plat::errors::InvalidArgument( + "Cannot find the input data type by all input data")); + } + return framework::OpKernelType(framework::proto::VarType::Type(dtype), + ctx.GetPlace()); + } +}; + +class OverflowV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) The input tensors of overflowv2 operator."); + AddOutput("Out", + "(Tensor) The output tensor of overflowv2 operator. " + "Same size compare to input tensor"); + AddComment(string::Sprintf(R"DOC( +Overflow %s operator. + +$$Out = %s(X)$$ + +Check whether each element of X is Inf or Nan, return the bool result of each +element of X as a tensor. + +%s +)DOC", + GetName(), GetComments())); + } + + protected: + virtual std::string GetName() const = 0; + virtual std::string GetComments() const = 0; +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +#define REGISTER_V2OP_MAKER(op_type, comment) \ + namespace paddle { \ + namespace operators { \ + class _##op_type##OverflowV2OpMaker \ + : public ::paddle::operators::OverflowV2OpMaker { \ + protected: \ + std::string GetName() const { return #op_type; } \ + std::string GetComments() const { return comment; } \ + }; \ + } \ + } \ + REGISTER_OPERATOR( \ + op_type, ops::OverflowV2Op, ops::_##op_type##OverflowV2OpMaker, \ + paddle::framework::EmptyGradOpMaker, \ + paddle::framework::EmptyGradOpMaker) + +#define REGISTER_OVERFLOW_CPU_KERNEL(op_type, functor) \ + REGISTER_OP_CPU_KERNEL( \ + op_type, ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel); + +REGISTER_V2OP_MAKER(isinf_v2, "isinfv2(X)"); +REGISTER_V2OP_MAKER(isnan_v2, "isnanv2(X)"); +REGISTER_V2OP_MAKER(isfinite_v2, "isfinitev2(X)"); + +REGISTER_OVERFLOW_CPU_KERNEL(isinf_v2, InfinityV2Functor); +REGISTER_OVERFLOW_CPU_KERNEL(isnan_v2, NANV2Functor); +REGISTER_OVERFLOW_CPU_KERNEL(isfinite_v2, IsfiniteV2Functor); diff --git a/paddle/fluid/operators/isfinite_v2_op.cu b/paddle/fluid/operators/isfinite_v2_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..4a6d818d0501e60dfffc8995075bb7f0369788fd --- /dev/null +++ b/paddle/fluid/operators/isfinite_v2_op.cu @@ -0,0 +1,36 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/isfinite_v2_op.h" +#include "paddle/fluid/platform/float16.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +#define REGISTER_OVERFLOW_CUDA_KERNEL(op_type, functor) \ + REGISTER_OP_CUDA_KERNEL( \ + op_type, ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel); + +REGISTER_OVERFLOW_CUDA_KERNEL(isinf_v2, InfinityV2Functor); +REGISTER_OVERFLOW_CUDA_KERNEL(isnan_v2, NANV2Functor); +REGISTER_OVERFLOW_CUDA_KERNEL(isfinite_v2, IsfiniteV2Functor); diff --git a/paddle/fluid/operators/isfinite_v2_op.h b/paddle/fluid/operators/isfinite_v2_op.h new file mode 100644 index 0000000000000000000000000000000000000000..9f0aa63ce80248ee9f7839890f611b9d5293789e --- /dev/null +++ b/paddle/fluid/operators/isfinite_v2_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/isfinite_op.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/transform.h" + +namespace paddle { +namespace operators { + +struct InfinityV2Functor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorContainsInfV2(tensor, out); + } +}; + +struct NANV2Functor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorContainsNANV2(tensor, out); + } +}; + +struct IsfiniteV2Functor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorIsfiniteV2(tensor, out); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 0a7146be83dcb673573f1fdcb94ed2d2c57bd2c3..2c3172d2a1112e2c79a3c1215ccd0d3f08d59451 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -53,11 +53,9 @@ class LinspaceOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - framework::LibraryType library_{framework::LibraryType::kPlain}; - framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "Start"), - ctx.device_context(), layout_, library_); + framework::proto::VarType::Type(ctx.Attr("dtype")), + ctx.GetPlace()); } }; @@ -73,6 +71,7 @@ class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Num", "Number of entry in the sequence. It is a tensor of shape [1], " "should be of type int32."); + AddAttr("dtype", "The output data type."); AddOutput("Out", "A sequence of numbers."); AddComment(R"DOC( Return fixed number of evenly spaced values within a given interval. First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy. @@ -85,4 +84,6 @@ class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(linspace, ops::LinspaceOp, ops::LinspaceOpMaker); REGISTER_OP_CPU_KERNEL(linspace, ops::CPULinspaceKernel, + ops::CPULinspaceKernel, + ops::CPULinspaceKernel, ops::CPULinspaceKernel); diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index 47d4536dcfe2a0ab43b3584196a138214e438e3e..8aca892a81d41b1e0a9f7f9c14169c2817ae9452 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -20,13 +20,15 @@ namespace paddle { namespace operators { template -__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) { - CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; } +__global__ void LinspaceKernel(T start, double step, int64_t size, T* out) { + CUDA_KERNEL_LOOP(index, size) { + out[index] = static_cast(start + step * index); + } } template __global__ void LinspaceSpecialKernel(T start, T* out) { - out[0] = start; + out[0] = static_cast(start); } template @@ -51,9 +53,9 @@ class CUDALinspaceKernel : public framework::OpKernel { out->Resize(framework::make_ddim({num})); T* out_data = out->mutable_data(context.GetPlace()); - T step = 0; + double step = 0; if (num != 1) { - step = (stop - start) / (num - 1); + step = (static_cast(stop - start)) / (num - 1); } auto stream = context.cuda_device_context().stream(); @@ -68,4 +70,6 @@ class CUDALinspaceKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL(linspace, ops::CUDALinspaceKernel, + ops::CUDALinspaceKernel, + ops::CUDALinspaceKernel, ops::CUDALinspaceKernel); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index b1fcac73b0ad249aa19859bde770a8554cdb7408..9fb4960375ed7be60598d558c65310bd4a4b84bc 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -35,14 +35,12 @@ class CPULinspaceKernel : public framework::OpKernel { T* out_data = out->mutable_data(context.GetPlace()); if (num > 1) { - T step = (stop - start) / (num - 1); - T value = start; + double step = (static_cast(stop - start)) / (num - 1); for (int i = 0; i < num; ++i) { - out_data[i] = value; - value += step; + out_data[i] = static_cast(start + step * i); } } else { - out_data[0] = start; + out_data[0] = static_cast(start); } } }; diff --git a/paddle/fluid/operators/log_softmax_op.cc b/paddle/fluid/operators/log_softmax_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d6e2b3ecff8c83e47a9016cc3d233d1aa03fb52b --- /dev/null +++ b/paddle/fluid/operators/log_softmax_op.cc @@ -0,0 +1,128 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/log_softmax_op.h" +#include +#include +#include "paddle/fluid/operators/common_infer_shape_functions.h" + +namespace paddle { +namespace operators { + +class LogSoftmaxOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + return UnaryOpUnchangedInferShapeCheckAxis(ctx); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } +}; + +class LogSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "The input tensor of softmax, " + "whose dimension :attr:`axis` is the input_feature_dimensions."); + AddOutput("Out", "The normalized values with the same shape as X."); + AddAttr("axis", + "The dimension index of Input(x) to perform log_softmax," + "default -1 for last dimension") + .SetDefault(-1); + AddComment(R"DOC( +LogSoftmax Operator. + +)DOC"); + } +}; + +class LogSoftmaxOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map& GetInputOutputWithSameType() + const override { + static std::unordered_map m{{"X", /*->*/ "Out"}}; + return m; + } +}; + +class LogSoftmaxGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("Out"), "Input", "Out", "log_softmax_grad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + "Out@grad", "log_softmax_grad"); + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("Out"), + ctx->GetInputDim(framework::GradVarName("Out")), + platform::errors::InvalidArgument("Input(Out) and its gradients " + "should have the same shape.")); + + ctx->SetOutputDim(framework::GradVarName("X"), + ctx->GetInputDim(framework::GradVarName("Out"))); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } +}; + +template +class LogSoftmaxGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("log_softmax_grad"); + op->SetInput("Out", this->Output("Out")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(log_softmax, ops::LogSoftmaxOp, ops::LogSoftmaxOpMaker, + ops::LogSoftmaxOpInferVarType, + ops::LogSoftmaxGradOpMaker, + ops::LogSoftmaxGradOpMaker); +REGISTER_OPERATOR(log_softmax_grad, ops::LogSoftmaxGradOp); + +REGISTER_OP_CPU_KERNEL( + log_softmax, + ops::LogSoftmaxKernel, + ops::LogSoftmaxKernel); +REGISTER_OP_CPU_KERNEL( + log_softmax_grad, + ops::LogSoftmaxGradKernel, + ops::LogSoftmaxGradKernel); diff --git a/paddle/fluid/operators/log_softmax_op.cu b/paddle/fluid/operators/log_softmax_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..02fca246d241d476b5540a6af8f49b16d4dae416 --- /dev/null +++ b/paddle/fluid/operators/log_softmax_op.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/log_softmax_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_CUDA_KERNEL( + log_softmax, ops::LogSoftmaxKernel, + ops::LogSoftmaxKernel, + ops::LogSoftmaxKernel); +REGISTER_OP_CUDA_KERNEL( + log_softmax_grad, ops::LogSoftmaxGradKernel, + ops::LogSoftmaxGradKernel, + ops::LogSoftmaxGradKernel); diff --git a/paddle/fluid/operators/log_softmax_op.h b/paddle/fluid/operators/log_softmax_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b983ac54157d9d0679ac237ca94e742b38833864 --- /dev/null +++ b/paddle/fluid/operators/log_softmax_op.h @@ -0,0 +1,192 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +using EigenMatrix = framework::EigenMatrix; + +static inline int CanonicalAxis(const int axis, const int rank) { + if (axis < 0) { + return axis + rank; + } + return axis; +} + +static inline int SizeToAxis(const int axis, const framework::DDim dims) { + int size = 1; + for (int i = 0; i < axis; i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeFromAxis(const int axis, const framework::DDim dims) { + int size = 1; + for (int i = axis; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +template +struct ValueClip { + HOSTDEVICE T operator()(const T& x) const { + const T kThreshold = static_cast(-64.); + return x < kThreshold ? kThreshold : x; + } +}; + +template +struct LogSoftmaxFunctor { + void operator()(const DeviceContext& context, const framework::Tensor* X, + framework::Tensor* Y, const int axis) { + constexpr int kBatchDim = 0; + constexpr int kClassDim = 1; + constexpr int kAxisDim = 1; + + int axis_dim = X->dims()[axis]; + const int n = SizeToAxis(axis, X->dims()); + const int d = SizeFromAxis(axis, X->dims()); + framework::DDim dim_2d{n, d}; + + auto logits = EigenMatrix::From(*X, dim_2d); + auto log_softmax = EigenMatrix::From(*Y, dim_2d); + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + const int num_remain = num_classes / axis_dim; + + Eigen::DSizes along_axis(kAxisDim); + Eigen::DSizes batch_classes(batch_size, num_classes); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + Eigen::DSizes batch_one_remain(batch_size, 1, num_remain); + Eigen::DSizes one_axis_one(1, axis_dim, 1); + Eigen::DSizes one_axis(1, axis_dim); + Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); + + // For numerical stability, logits should be shifted by maximum number along + // axis, calculate shifted_logits into log_softmax tensor for memory reuse. + if (num_remain == 1) { + // axis == -1, axis and class in same dimension, calculate along + // class dimension directly for higher performance + log_softmax.device(*context.eigen_device()) = + (logits - + logits.maximum(along_axis) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)) + .unaryExpr(ValueClip()); + } else { + // axis != -1, class dimension split into (axis, remain), max and sum + // should be calculated along axis dimension + log_softmax.device(*context.eigen_device()) = + (logits.reshape(batch_axis_remain) - + logits.reshape(batch_axis_remain) + .maximum(along_axis) + .eval() + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_classes)) + .unaryExpr(ValueClip()); + } + + log_softmax.device(*context.eigen_device()) = + log_softmax - + log_softmax.exp() + .eval() + .reshape(batch_axis_remain) + .sum(along_axis) + .log() + .broadcast(one_axis); + } +}; + +template +class LogSoftmaxKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Out = context.Output("Out"); + const int rank = X->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + + // allocate memory on device. + Out->mutable_data(context.GetPlace()); + + LogSoftmaxFunctor()( + context.template device_context(), X, Out, axis); + } +}; + +template +struct LogSoftmaxGradFunctor { + void operator()(const DeviceContext& context, const framework::Tensor* Y, + const framework::Tensor* dY, framework::Tensor* dX, + const int axis) { + constexpr int kBatchDim = 0; + constexpr int kClassDim = 1; + + const int n = SizeToAxis(axis, Y->dims()); + const int d = SizeFromAxis(axis, Y->dims()); + framework::DDim dim_2d{n, d}; + + auto y = EigenMatrix::From(*Y, dim_2d); + auto dy = EigenMatrix::From(*dY, dim_2d); + auto dx = EigenMatrix::From(*dX, dim_2d); + + const int axis_dim = Y->dims()[axis]; + const int batch_size = y.dimension(kBatchDim); + const int num_classes = y.dimension(kClassDim); + const int num_remain = num_classes / axis_dim; + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_axis_remain(batch_size, axis_dim, num_remain); + Eigen::DSizes one_axis(1, axis_dim); + + dx.device(*context.eigen_device()) = + dy - + (y.exp()) * (dy.reshape(batch_axis_remain) + .sum(along_class) + .broadcast(one_axis)); + } +}; + +template +class LogSoftmaxGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* Out = context.Input("Out"); + auto* dOut = + context.Input(framework::GradVarName("Out")); + auto* dX = context.Output(framework::GradVarName("X")); + const int rank = Out->dims().size(); + const int axis = CanonicalAxis(context.Attr("axis"), rank); + + // allocate memory on device. + dX->mutable_data(context.GetPlace()); + + LogSoftmaxGradFunctor()( + context.template device_context(), Out, dOut, dX, axis); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/masked_select_op.cc b/paddle/fluid/operators/masked_select_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3b44c02757fae9648a7e660a06c03af45d621e02 --- /dev/null +++ b/paddle/fluid/operators/masked_select_op.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/masked_select_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class MaskedSelectOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "Input", "MaskedSelect"); + OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "MaskedSelect"); + OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Out", "MaskedSelect"); + framework::DDim output_dims(ctx->GetInputDim("X")); + ctx->SetOutputDim("Y", output_dims); + ctx->ShareLoD("X", /*->*/ "Y"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.device_context()); + } +}; + +class MaskedSelectOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input tensor."); + AddInput("Mask", + "The mask of Input Tensor to be selected which is a bool Tensor."); + AddOutput( + "Y", + "The returned tensor, the data type " + "is same as input, will be on the same device with the input Tensor."); + AddComment(R"DOC( +Size Operator. + +Return a new 0-D tensor which indexes the indexed tensor according +the mask which is a tensor withe data type bool. +)DOC"); + } +}; + +class MaskedSelectOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Input", + "Input", "MaskedSelect"); + OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "MaskedSelect"); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*-->*/ framework::GradVarName("X")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Y")), + ctx.device_context()); + } +}; + +template +class MaskedSelectGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("masked_select_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Mask", this->Input("Mask")); + op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERER(MaskedSelectedGradNoNeedBufferVarsInferer, + "X"); +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(masked_select, ops::MaskedSelectOp, ops::MaskedSelectOpMaker, + ops::MaskedSelectGradOpMaker, + ops::MaskedSelectGradOpMaker); +REGISTER_OPERATOR(masked_select_grad, ops::MaskedSelectOpGrad, + ops::MaskedSelectedGradNoNeedBufferVarsInferer); + +REGISTER_OP_CPU_KERNEL( + masked_select, + ops::MaskedSelectKernel, + ops::MaskedSelectKernel, + ops::MaskedSelectKernel, + ops::MaskedSelectKernel); +REGISTER_OP_CPU_KERNEL( + masked_select_grad, + ops::MaskedSelectGradKernel, + ops::MaskedSelectGradKernel, + ops::MaskedSelectGradKernel, + ops::MaskedSelectGradKernel); diff --git a/paddle/fluid/operators/masked_select_op.cu b/paddle/fluid/operators/masked_select_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..7dc0516800c483d1d82a2390a64130e77b1efb01 --- /dev/null +++ b/paddle/fluid/operators/masked_select_op.cu @@ -0,0 +1,179 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include +#include +#include +#include +#include "paddle/fluid/operators/masked_select_op.h" +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DDim = framework::DDim; + +__global__ void SetMaskArray(const bool* mask, int32_t* mask_array, int size) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < size; idx += blockDim.x * gridDim.x) { + if (mask[idx]) + mask_array[idx] = 1; + else + mask_array[idx] = 0; + } +} + +template +__global__ void SelectWithPrefixMask(const int32_t* mask_prefix_sum, + const bool* mask, const T* input, T* out, + int size) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < size; idx += blockDim.x * gridDim.x) { + if (mask[idx]) { + int index = mask_prefix_sum[idx]; + out[index] = input[idx]; + } + } +} + +template +__global__ void SelectGradWithPrefixMask(const int32_t* mask_prefix_sum, + const bool* mask, const T* input, + T* out, int size) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < size; idx += blockDim.x * gridDim.x) { + if (mask[idx]) { + int index = mask_prefix_sum[idx]; + out[idx] = input[index]; + } else { + out[idx] = 0; + } + } +} + +template +class MaskedSelectCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto input = ctx.Input("X"); + auto mask = ctx.Input("Mask"); + auto out = ctx.Output("Y"); + auto* mask_data = mask->data(); + auto input_data = input->data(); + + auto mask_size = mask->numel(); + auto input_dim = input->dims(); + auto mask_dim = mask->dims(); + PADDLE_ENFORCE_EQ( + input_dim, mask_dim, + platform::errors::InvalidArgument( + "The dim size of input and mask in OP(masked_selected) " + "must be equal, but got input dim:(%ld), mask dim: " + "(%ld). Please check input " + "value.", + input_dim, mask_dim)); + + thrust::device_ptr mask_dev_ptr = + thrust::device_pointer_cast(mask_data); + thrust::device_vector mask_vec(mask_dev_ptr, mask_dev_ptr + mask_size); + auto out_size = thrust::count(mask_vec.begin(), mask_vec.end(), true); + + framework::DDim out_dim{out_size}; + out->Resize(out_dim); + auto out_data = out->mutable_data(ctx.GetPlace()); + + Tensor mask_array; + Tensor mask_prefix_sum; + mask_array.Resize(mask_dim); + mask_prefix_sum.Resize(mask_dim); + + int32_t* mask_array_data = mask_array.mutable_data(ctx.GetPlace()); + int32_t* mask_prefix_sum_data = + mask_prefix_sum.mutable_data(ctx.GetPlace()); + int threads = 512; + int grid = (mask_size + threads - 1) / threads; + auto stream = ctx.cuda_device_context().stream(); + SetMaskArray<<>>(mask_data, mask_array_data, + mask_size); + + thrust::device_ptr mask_array_dev_ptr = + thrust::device_pointer_cast(mask_array_data); + thrust::device_vector mask_array_vec( + mask_array_dev_ptr, mask_array_dev_ptr + mask_size); + thrust::exclusive_scan(thrust::device, mask_array_vec.begin(), + mask_array_vec.end(), mask_prefix_sum_data); + + SelectWithPrefixMask<<>>( + mask_prefix_sum_data, mask_data, input_data, out_data, mask_size); + } +}; + +template +class MaskedSelectGradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto input = ctx.Input(framework::GradVarName("Y")); + auto mask = ctx.Input("Mask"); + auto out = ctx.Output(framework::GradVarName("X")); + auto* mask_data = mask->data(); + auto* input_data = input->data(); + auto* out_data = out->mutable_data(ctx.GetPlace()); + + auto input_size = input->numel(); + auto mask_size = mask->numel(); + auto mask_dim = mask->dims(); + + auto out_size = mask_size; + + Tensor mask_array; + Tensor mask_prefix_sum; + mask_array.Resize(mask_dim); + mask_prefix_sum.Resize(mask_dim); + + int32_t* mask_array_data = mask_array.mutable_data(ctx.GetPlace()); + int32_t* mask_prefix_sum_data = + mask_prefix_sum.mutable_data(ctx.GetPlace()); + int threads = 512; + int grid = (mask_size + threads - 1) / threads; + auto stream = ctx.cuda_device_context().stream(); + SetMaskArray<<>>(mask_data, mask_array_data, + mask_size); + + thrust::device_ptr mask_array_dev_ptr = + thrust::device_pointer_cast(mask_array_data); + thrust::device_vector mask_array_vec( + mask_array_dev_ptr, mask_array_dev_ptr + mask_size); + thrust::exclusive_scan(thrust::device, mask_array_vec.begin(), + mask_array_vec.end(), mask_prefix_sum_data); + + SelectGradWithPrefixMask<<>>( + mask_prefix_sum_data, mask_data, input_data, out_data, mask_size); + } +}; +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + masked_select, + ops::MaskedSelectCUDAKernel, + ops::MaskedSelectCUDAKernel, + ops::MaskedSelectCUDAKernel, + ops::MaskedSelectCUDAKernel); +REGISTER_OP_CUDA_KERNEL( + masked_select_grad, + ops::MaskedSelectGradCUDAKernel, + ops::MaskedSelectGradCUDAKernel, + ops::MaskedSelectGradCUDAKernel, + ops::MaskedSelectGradCUDAKernel); diff --git a/paddle/fluid/operators/masked_select_op.h b/paddle/fluid/operators/masked_select_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ce8371556c82fe105b6719e845d4fd6232f3a95e --- /dev/null +++ b/paddle/fluid/operators/masked_select_op.h @@ -0,0 +1,94 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DDim = framework::DDim; + +template +class MaskedSelectKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto input = context.Input("X"); + auto mask = context.Input("Mask"); + auto out = context.Output("Y"); + auto* mask_data = mask->data(); + auto input_data = input->data(); + + auto mask_size = mask->numel(); + + auto input_dim = input->dims(); + auto mask_dim = mask->dims(); + PADDLE_ENFORCE_EQ( + input_dim, mask_dim, + platform::errors::InvalidArgument( + "The dim size of input and mask in OP(masked_selected) " + "must be equal, but got input dim:(%ld), mask dim: " + "(%ld). Please check input " + "value.", + input_dim, mask_dim)); + + int out_size = 0; + for (int i = 0; i < mask_size; i++) { + if (mask_data[i]) out_size++; + } + + framework::DDim out_dim{out_size}; + out->Resize(out_dim); + auto out_data = out->mutable_data(context.GetPlace()); + + int index = 0; + for (int i = 0; i < mask_size; i++) { + if (mask_data[i]) { + out_data[index] = input_data[i]; + index++; + } + } + } +}; + +template +class MaskedSelectGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto out = context.Output(framework::GradVarName("X")); + auto mask = context.Input("Mask"); + auto input = context.Input(framework::GradVarName("Y")); + + auto* mask_data = mask->data(); + auto* input_data = input->data(); + auto* out_data = out->mutable_data(context.GetPlace()); + int mask_size = mask->numel(); + + int index = 0; + for (int i = 0; i < mask_size; i++) { + if (mask_data[i]) { + out_data[i] = input_data[index]; + index++; + } else { + out_data[i] = 0; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index f8c971954fc4c0b367cc6e62df8f7a596b651b94..42a60e9220cf848ba766a19cb7b4d13edc460c11 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -198,6 +198,11 @@ class Blas { int K, T alpha, const T* A, const T* B, T beta, T* C, int batchCount, int64_t strideA, int64_t strideB) const; + template + void BatchedGEMM(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, + int K, T alpha, const T** A, const T** B, T beta, T** C, + int batchCount) const; + #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA) template void BatchedGEMMWithHead(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, diff --git a/paddle/fluid/operators/math/blas_impl.cu.h b/paddle/fluid/operators/math/blas_impl.cu.h index 64b35cfeaecd1f88395db97d0374d919356651eb..d0c5f74d4efb8248b41d8b2af285e8dd7ec4d479 100644 --- a/paddle/fluid/operators/math/blas_impl.cu.h +++ b/paddle/fluid/operators/math/blas_impl.cu.h @@ -458,6 +458,17 @@ void Blas::BatchedGEMM( #endif // CUDA_VERSION >= 9010 } +template <> +template +void Blas::BatchedGEMM( + CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, int K, + T alpha, const T **A, const T **B, T beta, T **C, int batchCount) const { + for (int k = 0; k < batchCount; ++k) { + this->template GEMM(transA, transB, M, N, K, alpha, A[k], B[k], beta, + C[k]); + } +} + template <> template void Blas::TRSM(CBLAS_SIDE side, CBLAS_UPLO uplo, diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index cdaf53fea30085b34f07c37d50455c9b02dc5c44..892bf15738141bfbb7e75fa6b37c0cda53a8e098 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once +#include #include #include #include @@ -655,6 +656,26 @@ void Blas::BatchedGEMM( #endif } +template <> +template +void Blas::BatchedGEMM( + CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, int K, + T alpha, const T **A, const T **B, T beta, T **C, int batchCount) const { +#ifdef PADDLE_WITH_MKLML + const int lda = std::max((transA == CblasNoTrans) ? K : M, 1); + const int ldb = std::max((transB == CblasNoTrans) ? N : K, 1); + const int ldc = std::max(N, 1); + CBlas::GEMM_BATCH(CblasRowMajor, &transA, &transB, &M, &N, &K, &alpha, A, + &lda, B, &ldb, &beta, C, &ldc, 1 /* group_count */, + &batchCount); +#else + for (int k = 0; k < batchCount; ++k) { + this->template GEMM(transA, transB, M, N, K, alpha, A[k], B[k], beta, + C[k]); + } +#endif +} + #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA) template <> template diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 44b04104419e790b0ca8619b85ec0a1b4d701021..6748d0ab43f70f997b3008f34f4be743b81e8946 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -73,6 +73,13 @@ struct TensorSetConstantCPU { float value_; }; +template <> +void set_constant_with_place( + const platform::DeviceContext& context, framework::Tensor* tensor, + float value) { + PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); +} + template <> void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, diff --git a/paddle/fluid/operators/math/sampler.cc b/paddle/fluid/operators/math/sampler.cc index 238d9f2905058d267ffbee0669594920d7a9e031..86feaa72d5fa69cd5d76e56182c27b8d048e4c74 100644 --- a/paddle/fluid/operators/math/sampler.cc +++ b/paddle/fluid/operators/math/sampler.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/generator.h" namespace paddle { namespace operators { @@ -31,7 +32,12 @@ UniformSampler::UniformSampler(int64_t range, unsigned int seed) dist_ = std::make_shared>(0, range); } -int64_t UniformSampler::Sample() const { return (*dist_)(*random_engine_); } +int64_t UniformSampler::Sample() const { + return framework::Generator::GetInstance()->is_init_py + ? (*dist_)(framework::Generator::GetInstance()->GetCPUEngine()) + : (*dist_)(*random_engine_); + // return (*dist_)(*random_engine_); +} float UniformSampler::Probability(int64_t value) const { return inv_range_; } @@ -46,8 +52,11 @@ int64_t LogUniformSampler::Sample() const { // inverse_transform_sampling method // More details: // https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler/ - const int64_t value = - static_cast(exp((*dist_)(*random_engine_) * log_range_)) - 1; + auto cur_random = + framework::Generator::GetInstance()->is_init_py + ? (*dist_)(framework::Generator::GetInstance()->GetCPUEngine()) + : (*dist_)(*random_engine_); + const int64_t value = static_cast(exp(cur_random * log_range_)) - 1; // Mathematically, value should be <= range_, but might not be due to some // floating point roundoff, so we mod by range_. return value % range_; @@ -75,8 +84,14 @@ CustomSampler::CustomSampler(int64_t range, const float *probabilities, } int64_t CustomSampler::Sample() const { - auto index = (*int_dist_)(*random_engine_); - auto p = (*real_dist_)(*random_engine_); + auto index = + framework::Generator::GetInstance()->is_init_py + ? (*int_dist_)(framework::Generator::GetInstance()->GetCPUEngine()) + : (*int_dist_)(*random_engine_); + auto p = + framework::Generator::GetInstance()->is_init_py + ? (*real_dist_)(framework::Generator::GetInstance()->GetCPUEngine()) + : (*real_dist_)(*random_engine_); if (p > alias_probs_[index]) { int alias = alias_[index]; diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0254ad0a563d91282e76cd7bf43343e4d9139842 --- /dev/null +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -0,0 +1,176 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/matmul_v2_op.h" +#include +#include + +namespace paddle { +namespace operators { + +class MatMulV2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "matmul_v2"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "matmul_v2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "matmul_v2"); + bool trans_x = ctx->Attrs().Get("trans_x"); + bool trans_y = ctx->Attrs().Get("trans_y"); + + std::vector dims_x = + paddle::framework::vectorize(ctx->GetInputDim("X")); + std::vector dims_y = + paddle::framework::vectorize(ctx->GetInputDim("Y")); + auto ndims_x = dims_x.size(); + auto ndims_y = dims_y.size(); + + bool x_broadcasted = false, y_broadcasted = false; + if (ndims_x == 1) { + dims_x.insert(dims_x.begin(), 1); + ndims_x = 2; + x_broadcasted = true; + } + + if (ndims_y == 1) { + dims_y.push_back(1); + ndims_y = 2; + y_broadcasted = true; + } + + size_t M, N; + if (trans_x) { + M = dims_x[ndims_x - 1]; + } else { + M = dims_x[ndims_x - 2]; + } + if (trans_y) { + N = dims_y[ndims_y - 2]; + } else { + N = dims_y[ndims_y - 1]; + } + + std::vector new_dims; + if (ndims_x >= ndims_y) { + new_dims.assign(dims_x.begin(), dims_x.end() - 2); + } else { + new_dims.assign(dims_y.begin(), dims_y.end() - 2); + } + if (!x_broadcasted) { + new_dims.push_back(M); + } + if (!y_broadcasted) { + new_dims.push_back(N); + } + if (x_broadcasted && y_broadcasted) { + new_dims.push_back(1); + } + + auto out_dims = framework::make_ddim(new_dims); + ctx->SetOutputDim("Out", out_dims); + ctx->ShareLoD("X", /* --> */ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } +}; + +class MatMulV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "tensor of shape (d0, d1 ... M, K)"); + AddInput("Y", "tensor of shape (d0, d1 ... K, N)"); + AddOutput("Out", "tensor of shape (d0, d1 ... M, N)"); + AddAttr("trans_x", + "Set true to transpose the last two dimensions of X before " + "doing multiplication") + .SetDefault(false); + AddAttr("trans_y", + "Set true to transpose the last two dimensions of Y before " + "doing multiplication") + .SetDefault(false); + AddComment( + R"DOC(Matrix multiplication Out = X * Y. A has shape (d0, d1 ... M, K), + B has shape (d0, d1 ... K, N), Out has shape ((d0, d1 ... M, N)). + In addition, it also follows the broadcast rule which is similar as + numpy.matmul. +)DOC"); + } +}; + +class MatMulV2OpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* context) const override { + OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "matmul_v2"); + OP_INOUT_CHECK(context->HasInput("Y"), "Input", "Y", "matmul_v2"); + OP_INOUT_CHECK(context->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "matmul_v2"); + auto x_dims = context->GetInputDim("X"); + auto y_dims = context->GetInputDim("Y"); + + auto x_grad_name = framework::GradVarName("X"); + auto y_grad_name = framework::GradVarName("Y"); + + if (context->HasOutput(x_grad_name)) { + context->SetOutputDim(x_grad_name, x_dims); + } + if (context->HasOutput(y_grad_name)) { + context->SetOutputDim(y_grad_name, y_dims); + } + } +}; + +template +class MatMulV2GradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("matmul_v2_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Y", this->Input("Y")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y")); + op->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(matmul_v2, ops::MatMulV2Op, ops::MatMulV2OpMaker, + ops::MatMulV2GradOpMaker, + ops::MatMulV2GradOpMaker); + +REGISTER_OPERATOR(matmul_v2_grad, ops::MatMulV2OpGrad); + +REGISTER_OP_CPU_KERNEL( + matmul_v2, ops::MatMulV2Kernel, + ops::MatMulV2Kernel); + +REGISTER_OP_CPU_KERNEL( + matmul_v2_grad, + ops::MatMulV2GradKernel, + ops::MatMulV2GradKernel); diff --git a/paddle/fluid/operators/matmul_v2_op.cu b/paddle/fluid/operators/matmul_v2_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..64ec65a23419725c7cc481beadb9383402a426bd --- /dev/null +++ b/paddle/fluid/operators/matmul_v2_op.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/matmul_v2_op.h" + +namespace ops = paddle::operators; +namespace plf = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(matmul_v2, + ops::MatMulV2Kernel, + ops::MatMulV2Kernel); + +REGISTER_OP_CUDA_KERNEL( + matmul_v2_grad, ops::MatMulV2GradKernel, + ops::MatMulV2GradKernel); diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h new file mode 100644 index 0000000000000000000000000000000000000000..dc83e4d964815ec46452bb0086cf17437b3846a4 --- /dev/null +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -0,0 +1,481 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/dot_op.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h" + +#ifdef __NVCC__ +#include "paddle/fluid/operators/reduce_ops/cub_reduce.h" +#endif + +namespace paddle { +namespace operators { + +using framework::Tensor; + +template +struct IdentityFunctor { + HOSTDEVICE explicit inline IdentityFunctor() {} + + HOSTDEVICE inline T operator()(const T& x) const { return x; } +}; + +template +void ReduceSumForMatmulGrad(const Tensor* input, Tensor* output, + const std::vector& reduce_dims, + const paddle::framework::ExecutionContext& ctx) { + if (reduce_dims.empty()) { + // FIXME maybe reduce this copy operation + framework::TensorCopySync(*input, ctx.GetPlace(), output); + return; + } +#ifdef __NVCC__ + auto stream = ctx.cuda_device_context().stream(); + TensorReduce>( + *input, output, reduce_dims, static_cast(0), cub::Sum(), + IdentityFunctor(), stream); +#else + ReduceKernelFunctor( + input, output, reduce_dims, true, false, ctx) + .template apply(); +#endif +} + +static void GetBroadcastFromDims(const int x_ndim, const std::int64_t* x_dims, + const int y_ndim, const std::int64_t* y_dims, + std::int64_t* x_bd_dims, + std::int64_t* y_bd_dims, + std::int64_t* out_bd_dims) { + const int ndim = std::max(x_ndim, y_ndim); + std::fill(x_bd_dims, x_bd_dims + ndim - x_ndim, 1); + std::fill(y_bd_dims, y_bd_dims + ndim - y_ndim, 1); + std::copy(x_dims, x_dims + x_ndim, x_bd_dims + ndim - x_ndim); + std::copy(y_dims, y_dims + y_ndim, y_bd_dims + ndim - y_ndim); + + for (int i = 0; i < ndim; ++i) { + PADDLE_ENFORCE_EQ( + x_bd_dims[i] == y_bd_dims[i] || x_bd_dims[i] <= 1 || y_bd_dims[i] <= 1, + true, platform::errors::InvalidArgument( + "Input(X) and Input(Y) has error dim.")); + if (x_bd_dims[i] == 0 || y_bd_dims[i] == 0) { + out_bd_dims[i] = 0; + } else { + out_bd_dims[i] = std::max(x_bd_dims[i], y_bd_dims[i]); + } + } +} + +static int64_t GetIndexMessage(const int n, const int64_t* dims, + const int64_t* index) { + int64_t sum = 0; + for (int i = 0; i < n; ++i) { + if (dims[i] > 1) { + sum = sum * dims[i] + index[i]; + } + } + return sum; +} + +static void IndexIncreaseFromDims(const int ndim, const int64_t* dims, + int64_t* index) { + for (int i = ndim - 1; i >= 0; --i) { + ++index[i]; + if (index[i] >= dims[i]) { + index[i] -= dims[i]; + } else { + break; + } + } +} + +template +void MatMulFunction(const Tensor* X, const Tensor* Y, + const std::vector& x_dims, + const std::vector& y_dims, Tensor* Out, + bool trans_x, bool trans_y, + const paddle::framework::ExecutionContext& ctx) { + const int x_ndim = x_dims.size(); + const int y_ndim = y_dims.size(); + + // get data ptr + const T* x_data = X->data(); + const T* y_data = Y->data(); + + if (x_ndim == 1 && y_ndim == 1) { + PADDLE_ENFORCE_EQ(X->numel(), Y->numel(), + platform::errors::InvalidArgument( + "X's numbers is not equal to Y's numbers," + "when X/Y's dims =1")); + VLOG(3) << "MatMul's case 1"; + Out->Resize({1}); + Out->mutable_data(ctx.GetPlace()); + auto out_eigen = framework::EigenScalar::From(*Out); + auto x_eigen = framework::EigenVector::Flatten(*X); + auto y_eigen = framework::EigenVector::Flatten(*Y); + + auto& dev = *ctx.template device_context().eigen_device(); + out_eigen.device(dev) = (x_eigen * y_eigen).sum(); + return; + } + + auto& dev_ctx = ctx.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + if (x_ndim == 1) { + const int N = X->numel(); + if (trans_y) { + PADDLE_ENFORCE_EQ( + y_dims[y_ndim - 1], N, + platform::errors::InvalidArgument("Input(Y) has error dim.")); + } else { + PADDLE_ENFORCE_EQ( + y_dims[y_ndim - 2], N, + platform::errors::InvalidArgument("Input(Y) has error dim.")); + } + std::vector out_dims(y_ndim - 1); + if (trans_y) { + std::copy_n(y_dims.cbegin(), y_ndim - 1, out_dims.begin()); + } else { + std::copy_n(y_dims.cbegin(), y_ndim - 2, out_dims.begin()); + out_dims.back() = y_dims.back(); + } + Out->Resize(framework::make_ddim(out_dims)); + Out->mutable_data(ctx.GetPlace()); + if (trans_y) { + const int M = Y->numel() / N; + VLOG(3) << "MatMul's case 2"; + blas.GEMV(false, M, N, 1., y_data, x_data, 0., Out->data()); + } else { + const int M = y_dims[y_ndim - 1]; + const int batch_size = Y->numel() / (M * N); + if (batch_size == 1) { + VLOG(3) << "MatMul's case 3"; + blas.GEMV(true, N, M, 1., y_data, x_data, 0., Out->data()); + } else { + VLOG(3) << "MatMul's case 4"; + blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, 1.0f, y_data, + x_data, 0, Out->data(), batch_size, M * N, 0); + } + } + return; + } + + if (y_ndim == 1) { + const int N = Y->numel(); + if (trans_x) { + PADDLE_ENFORCE_EQ( + x_dims[x_ndim - 2], N, + platform::errors::InvalidArgument("Input(X) has error dim.")); + } else { + PADDLE_ENFORCE_EQ( + x_dims[x_ndim - 1], N, + platform::errors::InvalidArgument("Input(X) has error dim.")); + } + std::vector out_dims(x_ndim - 1); + if (trans_x) { + std::copy_n(x_dims.cbegin(), x_ndim - 2, out_dims.begin()); + out_dims.back() = x_dims.back(); + } else { + std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); + } + Out->Resize(framework::make_ddim(out_dims)); + Out->mutable_data(ctx.GetPlace()); + + if (trans_x) { + const int M = x_dims[x_ndim - 1]; + const int batch_size = X->numel() / (M * N); + if (batch_size == 1) { + VLOG(3) << "MatMul's case 5"; + blas.GEMV(true, N, M, 1.0f, x_data, y_data, 0.0f, Out->data()); + } else { + VLOG(3) << "MatMul's case 6"; + blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, 1.0f, x_data, + y_data, 0, Out->data(), batch_size, M * N, 0); + } + } else { + const int M = X->numel() / N; + VLOG(3) << "MatMul's case 7"; + blas.GEMV(false, M, N, 1.0f, x_data, y_data, 0.0f, Out->data()); + } + return; + } + + const int M = trans_x ? x_dims[x_ndim - 1] : x_dims[x_ndim - 2]; + const int K = trans_x ? x_dims[x_ndim - 2] : x_dims[x_ndim - 1]; + if (trans_y) { + PADDLE_ENFORCE_EQ(y_dims[y_ndim - 1], K, platform::errors::InvalidArgument( + "Input(X) has error dim.")); + } else { + PADDLE_ENFORCE_EQ(y_dims[y_ndim - 2], K, platform::errors::InvalidArgument( + "Input(X) has error dim.")); + } + const int N = trans_y ? y_dims[y_ndim - 2] : y_dims[y_ndim - 1]; + const int ndim = std::max(x_ndim, y_ndim); + std::vector x_broadcast_dims(ndim); + std::vector y_broadcast_dims(ndim); + std::vector out_broadcast_dims(ndim); + + GetBroadcastFromDims(x_ndim - 2, x_dims.data(), y_ndim - 2, y_dims.data(), + x_broadcast_dims.data(), y_broadcast_dims.data(), + out_broadcast_dims.data()); + + out_broadcast_dims[ndim - 2] = M; + out_broadcast_dims[ndim - 1] = N; + + Out->Resize(framework::make_ddim(out_broadcast_dims)); + Out->mutable_data(ctx.GetPlace()); + + const int batch_dim = ndim - 2; + // broadcast message + const bool is_broadcast_dims = !std::equal( + x_broadcast_dims.cbegin(), x_broadcast_dims.cbegin() + batch_dim, + y_broadcast_dims.cbegin()); + + const std::int64_t x_batch_size = std::accumulate( + x_broadcast_dims.cbegin(), x_broadcast_dims.cbegin() + batch_dim, 1LL, + std::multiplies()); + const std::int64_t y_batch_size = std::accumulate( + y_broadcast_dims.cbegin(), y_broadcast_dims.cbegin() + batch_dim, 1LL, + std::multiplies()); + const std::int64_t out_batch_size = std::accumulate( + out_broadcast_dims.cbegin(), out_broadcast_dims.cbegin() + batch_dim, 1LL, + std::multiplies()); + if (out_batch_size == 0) return; + if (x_batch_size == 1 && y_batch_size == 1) { + VLOG(3) << "MatMul's case 8"; + blas.GEMM(trans_x ? CblasTrans : CblasNoTrans, + trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, x_data, + y_data, 0.0f, Out->data()); + } else if (x_batch_size == 1) { + if (M == 1 && trans_y) { + VLOG(3) << "MatMul's case 9"; + blas.GEMV(false, y_batch_size * N, K, 1.0f, y_data, x_data, 0.0f, + Out->data()); + } else { + VLOG(3) << "MatMul's case 10"; + blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, + trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, + x_data, y_data, 0, Out->data(), out_batch_size, 0, + K * N); + } + } else if (y_batch_size == 1) { + if (!trans_x) { + VLOG(3) << "MatMul's case 11"; + blas.GEMM(CblasNoTrans, trans_y ? CblasTrans : CblasNoTrans, + x_batch_size * M, N, K, 1.0f, x_data, y_data, 0.0f, + Out->data()); + } else { + VLOG(3) << "MatMul's case 12"; + blas.BatchedGEMM(CblasTrans, trans_y ? CblasTrans : CblasNoTrans, M, N, K, + 1.0f, x_data, y_data, 0, Out->data(), out_batch_size, + M * K, 0); + } + } else if (!is_broadcast_dims) { + VLOG(3) << "MatMul's case 13"; + blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, + trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, x_data, + y_data, 0, Out->data(), out_batch_size, M * K, K * N); + } else { + // in the case, can't use stridedgemm + std::vector x_ptr(out_batch_size); + std::vector y_ptr(out_batch_size); + std::vector out_ptr(out_batch_size); + std::vector index(batch_dim, 0); + for (std::int64_t i = 0; i < out_batch_size; ++i) { + // using the index to get offset + const std::int64_t x_index = + GetIndexMessage(batch_dim, x_broadcast_dims.data(), index.data()); + const std::int64_t y_index = + GetIndexMessage(batch_dim, y_broadcast_dims.data(), index.data()); + + x_ptr[i] = x_data + x_index * M * K; + y_ptr[i] = y_data + y_index * K * N; + out_ptr[i] = Out->data() + i * M * N; + IndexIncreaseFromDims(batch_dim, out_broadcast_dims.data(), index.data()); + } + VLOG(3) << "MatMul's case 14"; + blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, + trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, + x_ptr.data(), y_ptr.data(), 0.0f, out_ptr.data(), + out_batch_size); + } +} + +template +void MatMulFunction(const Tensor* X, const Tensor* Y, Tensor* Out, bool trans_x, + bool trans_y, + const paddle::framework::ExecutionContext& ctx) { + const std::vector x_dims = vectorize(X->dims()); + const std::vector y_dims = vectorize(Y->dims()); + MatMulFunction(X, Y, x_dims, y_dims, Out, trans_x, trans_y, + ctx); +} + +template +class MatMulV2Kernel : public framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* Out = ctx.Output("Out"); + bool trans_x = ctx.Attr("trans_x"); + bool trans_y = ctx.Attr("trans_y"); + MatMulFunction(X, Y, Out, trans_x, trans_y, ctx); + } +}; + +template +class MatMulV2GradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + bool trans_x = ctx.Attr("trans_x"); + bool trans_y = ctx.Attr("trans_y"); + + // get dims + std::vector x_dims = vectorize(X->dims()); + std::vector y_dims = vectorize(Y->dims()); + std::vector dout_dims = vectorize(dOut->dims()); + + int x_ndim = x_dims.size(); + int y_ndim = y_dims.size(); + int ndim = dout_dims.size(); + + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + + // x's or y's dim = 1 + if (x_ndim == 1 && y_ndim == 1) { + if (dx) dx->mutable_data(ctx.GetPlace()); + if (dy) dy->mutable_data(ctx.GetPlace()); + if (dOut->numel() == 1) { + DotGradFunction(X, Y, dOut, dx, dy, ctx); + return; + } + } + // It is very tricky. For this broadcast, currently using the reduce sum to + // get gradient. + if (x_ndim == 1) { + x_dims.insert(x_dims.begin() + 0, 1); + x_ndim += 1; + if (trans_x) + dout_dims.push_back(1); + else + dout_dims.insert(dout_dims.begin() + ndim - 1, 1); + ndim += 1; + } + + if (y_ndim == 1) { + y_dims.push_back(1); + y_ndim += 1; + if (trans_y) + dout_dims.insert(dout_dims.begin() + ndim - 1, 1); + else + dout_dims.push_back(1); + ndim += 1; + } + + // the normal case + Tensor dx_help, dy_help; + if (trans_x) { + if (trans_y) { + // X'Y': dA = Y'G', dB = G'X' + if (dx) + MatMulFunction(Y, dOut, y_dims, dout_dims, &dx_help, + true, true, ctx); + if (dy) + MatMulFunction(dOut, X, dout_dims, x_dims, &dy_help, + true, true, ctx); + } else { + // X'Y: dX = YG', dY = XG + if (dx) + MatMulFunction(Y, dOut, y_dims, dout_dims, &dx_help, + false, true, ctx); + if (dy) + MatMulFunction(X, dOut, x_dims, dout_dims, &dy_help, + false, false, ctx); + } + } else { + if (trans_y) { + // XY': dX = GY, dY = G'X + if (dx) + MatMulFunction(dOut, Y, dout_dims, y_dims, &dx_help, + false, false, ctx); + if (dy) + MatMulFunction(dOut, X, dout_dims, x_dims, &dy_help, + true, false, ctx); + } else { + // XY: dX = GY', dY = X'G + if (dx) + MatMulFunction(dOut, Y, dout_dims, y_dims, &dx_help, + false, true, ctx); + if (dy) + MatMulFunction(X, dOut, x_dims, dout_dims, &dy_help, + true, false, ctx); + } + } + // get help dims + const std::vector dx_help_dims = vectorize(dx_help.dims()); + const std::vector dy_help_dims = vectorize(dy_help.dims()); + + std::vector dx_broadcast_dims(ndim); + std::vector dy_broadcast_dims(ndim); + + std::fill(dx_broadcast_dims.data(), + dx_broadcast_dims.data() + ndim - x_ndim, 1); + std::fill(dy_broadcast_dims.data(), + dy_broadcast_dims.data() + ndim - y_ndim, 1); + std::copy(x_dims.data(), x_dims.data() + x_ndim, + dx_broadcast_dims.data() + ndim - x_ndim); + std::copy(y_dims.data(), y_dims.data() + y_ndim, + dy_broadcast_dims.data() + ndim - y_ndim); + + std::vector dx_reduce_dims; + std::vector dy_reduce_dims; + for (int idx = 0; idx <= ndim - 3; idx++) { + if (dx_help_dims[idx] != 1 && dx_broadcast_dims[idx] == 1) { + dx_reduce_dims.push_back(idx); + } + if (dy_help_dims[idx] != 1 && dy_broadcast_dims[idx] == 1) { + dy_reduce_dims.push_back(idx); + } + } + // reduce sum to get grad by ReduceSum + if (dx) { + dx->Resize(dx_help.dims()); + ReduceSumForMatmulGrad(&dx_help, dx, dx_reduce_dims, + ctx); + dx->Resize(X->dims()); + } + if (dy) { + dy->Resize(dy_help.dims()); + ReduceSumForMatmulGrad(&dy_help, dy, dy_reduce_dims, + ctx); + dy->Resize(Y->dims()); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 86c1c3232644a1fed236563a65a16bc2f6466d49..540642c7140e707441ad9c4d71ae9b777863a7bd 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -51,11 +51,11 @@ class DeQuantOpKernel : public framework::OpKernel { mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type()); MKLDNNMemoryFormat src_fmt = input->format(); - std::string key = - platform::CreateKey(src_dt, src_tz, ctx.OutputName("Output")); - const std::string key_prim = key + "@reorder_p"; - const std::string key_src_mem = key + "@src_mem"; - const std::string key_dst_mem = key + "@dst_mem"; + std::string key = platform::CreateKey(platform::ThreadIDasStr(), src_dt, + src_tz, ctx.OutputName("Output")); + const std::string key_prim = key + "@r"; + const std::string key_src_mem = key + "@s"; + const std::string key_dst_mem = key + "@d"; std::shared_ptr src_memory; std::shared_ptr dst_memory; diff --git a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc index 37b6e3bb803a2b68cec54059b266bd7585ff9958..d0ecca78ae8b27451bc51a3c1561609fc470a9f8 100644 --- a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/operators/fill_constant_op.h" #include "paddle/fluid/operators/mean_op.h" @@ -28,21 +29,29 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel { float std = context.Attr("std"); auto* tensor = context.Output("Out"); - unsigned int seed = static_cast(context.Attr("seed")); - std::minstd_rand engine; - if (seed == 0) { - seed = std::random_device()(); - } - engine.seed(seed); - std::normal_distribution dist(mean, std); - const std::string op_type = "gaussian_random"; auto shape = GetShape(context, op_type); tensor->Resize(shape); T* data = tensor->mutable_data(context.GetPlace()); int64_t size = tensor->numel(); - for (int64_t i = 0; i < size; ++i) { - data[i] = dist(engine); + std::normal_distribution dist(mean, std); + + if (framework::Generator::GetInstance()->is_init_py) { + std::mt19937_64& gen_engine = + framework::Generator::GetInstance()->GetCPUEngine(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(gen_engine); + } + } else { + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } } tensor->set_layout(DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 55bd683f8f4283287e1bd67810170bd4082379a6..29a86a35d7b26f41745907fb6bacf30506c027a0 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -48,11 +48,12 @@ class QuantOpKernel : public framework::OpKernel { const T* input_data = input->data(); bool is_negative = ctx.Attr("is_negative_input"); - std::string key = platform::CreateKey(src_tz, scale_data, is_negative, - ctx.OutputName("Output")); - const std::string key_prim = key + "@reorder_p"; - const std::string key_src_mem = key + "@src_mem"; - const std::string key_dst_mem = key + "@dst_mem"; + std::string key = + platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data, + is_negative, ctx.OutputName("Output")); + const std::string key_prim = key + "@r"; + const std::string key_src_mem = key + "@s"; + const std::string key_dst_mem = key + "@d"; std::shared_ptr src_memory; std::shared_ptr dst_memory; diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 92e7744e3c0a459f3267f4210d42752b5ec0bcc0..5ad5ad9450503111882a9b3bc2cd9161f74d500e 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -40,11 +40,12 @@ class ReQuantOpKernel : public framework::OpKernel { auto src_tz = paddle::framework::vectorize(input->dims()); - std::string key = platform::CreateKey(src_tz, scale_in, scale_out, - ctx.OutputName("Output")); - const std::string key_prim = key + "@reorder_p"; - const std::string key_src_mem = key + "@src_mem"; - const std::string key_dst_mem = key + "@dst_mem"; + std::string key = + platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_in, + scale_out, ctx.OutputName("Output")); + const std::string key_prim = key + "@r"; + const std::string key_src_mem = key + "@s"; + const std::string key_dst_mem = key + "@d"; std::shared_ptr src_memory; std::shared_ptr dst_memory; diff --git a/paddle/fluid/operators/nll_loss_op.cc b/paddle/fluid/operators/nll_loss_op.cc index e99ccd31714787306358d9b19b31a62ff21d5dab..f0b5f4a466a0049c53d51d8610cf115d8bfe0295 100644 --- a/paddle/fluid/operators/nll_loss_op.cc +++ b/paddle/fluid/operators/nll_loss_op.cc @@ -55,8 +55,8 @@ class NLLLossOp : public framework::OperatorWithKernel { "Input(Weight) should be a 1D tensor.")); PADDLE_ENFORCE_EQ(x_dims[1], w_dims[0], platform::errors::InvalidArgument( - "Input(Weight) Tensor's size should match" - "to the class numer.")); + "Input(Weight) Tensor's size should match " + "to the the total number of classes.")); } } if (x_dims.size() == 2) { diff --git a/paddle/fluid/operators/nll_loss_op.cu b/paddle/fluid/operators/nll_loss_op.cu index 3d618805f02aa9b6d5310bfc8a79857f522f8ac5..531c175e03e5eee3eba609c322944b1398253726 100644 --- a/paddle/fluid/operators/nll_loss_op.cu +++ b/paddle/fluid/operators/nll_loss_op.cu @@ -44,6 +44,8 @@ __global__ void GPUNLLLossForward1D_no_reduce(T* out_data, const T* x_data, out_data[i] = 0; continue; } + PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; out_data[i] = -x_data[i * n_classes + cur_label] * cur_weight; } @@ -62,6 +64,8 @@ __global__ void GPUNLLLossForward1D_with_reduce( for (i = threadIdx.x; i < batch_size; i += NTHREADS) { const auto cur_label = label_data[i]; if (cur_label != ignore_index) { + PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds."); const auto cur_weight = weight_data ? weight_data[cur_label] : (T)1; sharedInputs[threadIdx.x] -= x_data[i * n_classes + cur_label] * cur_weight; @@ -198,6 +202,8 @@ __global__ void GPUNLLLossForward2D_no_reduce( out_data[index] = 0; continue; } + PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; out_data[index] = -x_data[b * sample_size + cur_label * map_size + h * in_dim3 + w] * @@ -226,6 +232,8 @@ __global__ void GPUNLLLossForward2D_with_reduce( i < map_nelem; i += step) { const int64_t cur_label = label_data[toffset + i]; if (cur_label != ignore_index) { + PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; input_sum -= x_data[ioffset + i + map_nelem * cur_label] * cur_weight; acc_weight += cur_weight; diff --git a/paddle/fluid/operators/nll_loss_op.h b/paddle/fluid/operators/nll_loss_op.h index 92f3d169f3f6a3be1009d84ebd87c82691eb9f0c..e93d5792205900635093e5f18d715e4607f73cda 100644 --- a/paddle/fluid/operators/nll_loss_op.h +++ b/paddle/fluid/operators/nll_loss_op.h @@ -91,7 +91,7 @@ static void nll_loss_2D(T* out_data, T* total_weight_data, const T* x_data, } PADDLE_ENFORCE_EQ(cur_label >= 0 && cur_label < n_classes, true, platform::errors::InvalidArgument( - "label should nor be out of bounds.")); + "label should not be out of bounds.")); const auto cur_weight = weight_data ? weight_data[cur_label] : static_cast(1); out_data[index] = -x_data[i * sample_size + cur_label * map_size + @@ -117,7 +117,7 @@ static void nll_loss_2D(T* out_data, T* total_weight_data, const T* x_data, } PADDLE_ENFORCE_EQ(cur_label >= 0 && cur_label < n_classes, true, platform::errors::InvalidArgument( - "label should nor be out of bounds.")); + "label should not be out of bounds.")); const auto cur_weight = weight_data ? weight_data[cur_label] : static_cast(1); total_weight_val += cur_weight; diff --git a/paddle/fluid/operators/p_norm_op.cc b/paddle/fluid/operators/p_norm_op.cc index 057a7a38e3f40fdeb400418740dab825f532054c..aa39821051eed11c3aa02c4baabfef539d7d7692 100644 --- a/paddle/fluid/operators/p_norm_op.cc +++ b/paddle/fluid/operators/p_norm_op.cc @@ -25,34 +25,49 @@ class PnormOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "(Tensor) A tensor of rank >= axis."); AddAttr("porder", - "The porder is the p order vector norm to calculate.") + "(float, default 2) The porder is the p order vector norm " + "to calculate. Available for porder=0, inf, -inf and any " + "real number.") .SetDefault(2.0f); AddAttr("axis", - "The axis on which to apply normalization. If axis < 0, " + "The axis on which to apply norm operation. If axis < 0, " "the dimension to pnorm is rank(X) + axis. -1 is " "the last dimension.") .SetDefault(-1); AddAttr("epsilon", - "(float, default 1e-10) The epsilon value is used " + "(float, default 1e-12) The epsilon value is used " "to avoid division by zero.") .SetDefault(1.0e-12f); AddAttr( "keepdim", - "(bool, default false) Whether to keep the dimensions as the input") + "(bool, default false) Whether to keep the dimensions as the input.") .SetDefault(false); - AddOutput( - "Out", - "(Tensor) Output tensor for the `(sum(x.pow(p)) + epsion).pow(1/p)`"); + AddOutput("Out", "(Tensor) Output result tensor of p-norm"); AddComment(R"DOC( +Pnorm Operator. +Given a tensor X, compute Lp-norm of X. -Given a tensor, apply 2-normalization along the provided axis. +When p = 0, defining $0^0 = 0$, the zero-norm of X is simply the number of non-zero elements of X. +$$ +||X||_{0} = \lim_{p \rightarrow 0} \sum_i |x_i|^p +$$ + +When p = inf, the inf-norm of X is the maximum element of X. +$$ +||X||_\infty = \max_i |x_i| +$$ + +When p = -inf, the negative-inf-norm of X is the minimum element of X. +$$ +||X||_{-\infty} = \min_i |x_i| +$$ +Otherwise, the p-norm of X follows the formula, $$ -pnorm = \(\sum_i {abs\(x_i\)^p} \)^{1/p} +||X||_{p} = (\sum_i |x_i|^p)^{1/p} $$ +where, $\sum_i $ is calculated along the `axis` dimension. -where, $\sum_i{x_i^p}$ is calculated along the `axis` dimension. - )DOC"); } }; @@ -63,31 +78,33 @@ class PnormOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "p_norm"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "p_norm"); - auto porder = ctx->Attrs().Get("porder"); - PADDLE_ENFORCE_NE(porder, INFINITY, - platform::errors::Unimplemented( - "The input porder of p_norm is not support for " - "porder == 0, INFINITY, -INFINITY now.")); - PADDLE_ENFORCE_NE(porder, -INFINITY, - platform::errors::Unimplemented( - "The input porder of p_norm is not support for " - "porder == 0, INFINITY, -INFINITY now.")); - PADDLE_ENFORCE_GT(porder, 0.0f, - platform::errors::InvalidArgument( - "The input porder of p_norm is not support for " - "porder <= 0, But received porder=%f.", - porder)); - auto xdim = ctx->GetInputDim("X"); + auto x_dim = ctx->GetInputDim("X"); + auto x_rank = x_dim.size(); int axis = ctx->Attrs().Get("axis"); bool keepdim = ctx->Attrs().Get("keepdim"); - if (axis < 0) axis = xdim.size() + axis; + + PADDLE_ENFORCE_GE(axis, -x_rank, + platform::errors::InvalidArgument( + "Attr(axis) value should be in range [-R, R-1], R is " + "the rank of Input(X). But received axis: %d, R: %d. " + "Current Input(X)'s shape is=[%s].", + axis, x_rank, x_dim)); + PADDLE_ENFORCE_LT(axis, x_rank, + platform::errors::InvalidArgument( + "Attr(axis) value should be in range [-R, R-1], R is " + "the rank of Input(X). But received axis: %d, R: %d. " + "Current Input(X)'s shape is=[%s].", + axis, x_rank, x_dim)); + + if (axis < 0) axis = x_dim.size() + axis; std::vector reduce_dims; - for (int i = 0; i < xdim.size(); ++i) { - if (i != axis) reduce_dims.emplace_back(xdim[i]); + for (int i = 0; i < x_dim.size(); ++i) { + if (i != axis) reduce_dims.emplace_back(x_dim[i]); } - xdim[axis] = 1; + x_dim[axis] = 1; + if (keepdim) { - ctx->SetOutputDim("Out", xdim); + ctx->SetOutputDim("Out", x_dim); } else { ctx->SetOutputDim("Out", framework::make_ddim(reduce_dims)); } diff --git a/paddle/fluid/operators/p_norm_op.cu b/paddle/fluid/operators/p_norm_op.cu index d9ac98ff880bcf42e0af5bb75b080464c5211671..63f2a1c56c12522bc8a029e392ff02f5a28b45df 100644 --- a/paddle/fluid/operators/p_norm_op.cu +++ b/paddle/fluid/operators/p_norm_op.cu @@ -49,20 +49,70 @@ __global__ void Pnorm(const T* x, const int pre, for (int i = blockIdx.x; i < num; i += gridDim.x) { int base = (i / post) * post * axis_n + (i % post); - T sum = 0.0; - __shared__ T norm; for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { const T x_ij = x[base + j * post]; sum += inline_pow(inline_abs(x_ij), porder_t); } T reduce_result = BlockReduce(temp_storage).Sum(sum); + if (threadIdx.x == 0) out_norm[i] = inline_pow(reduce_result, porder_inv); + } +} - if (threadIdx.x == 0) { - norm = inline_pow(reduce_result, porder_inv); - out_norm[i] = norm; +template +__global__ void ZeorNorm(const T* x, const int pre, + const int axis_n, // dim in axis + const int post, T* out_norm) { + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int num = pre * post; + for (int i = blockIdx.x; i < num; i += gridDim.x) { + int base = (i / post) * post * axis_n + (i % post); + T sum = 0.0; + for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { + const T x_ij = x[base + j * post]; + sum += static_cast(x_ij != 0); } - __syncthreads(); + T reduce_result = BlockReduce(temp_storage).Sum(sum); + if (threadIdx.x == 0) out_norm[i] = reduce_result; + } +} + +template +__global__ void InfNorm(const T* x, const int pre, + const int axis_n, // dim in axis + const int post, T* out_norm) { + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int num = pre * post; + for (int i = blockIdx.x; i < num; i += gridDim.x) { + int base = (i / post) * post * axis_n + (i % post); + T cur_max = inline_abs(x[base]); + for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { + T x_ij_abs = inline_abs(x[base + j * post]); + if (cur_max < x_ij_abs) cur_max = x_ij_abs; + } + T reduce_result = BlockReduce(temp_storage).Reduce(cur_max, cub::Max()); + if (threadIdx.x == 0) out_norm[i] = reduce_result; + } +} + +template +__global__ void NegInfNorm(const T* x, const int pre, + const int axis_n, // dim in axis + const int post, T* out_norm) { + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int num = pre * post; + for (int i = blockIdx.x; i < num; i += gridDim.x) { + int base = (i / post) * post * axis_n + (i % post); + T cur_min = inline_abs(x[base]); + for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { + T x_ij_abs = inline_abs(x[base + j * post]); + if (cur_min > x_ij_abs) cur_min = x_ij_abs; + } + T reduce_result = BlockReduce(temp_storage).Reduce(cur_min, cub::Min()); + if (threadIdx.x == 0) out_norm[i] = reduce_result; } } @@ -89,8 +139,19 @@ class PnormCUDAKernel : public framework::OpKernel { int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); - Pnorm<<>>(x, pre, n, post, - porder, norm); + if (porder == 0) { + ZeorNorm<<>>(x, pre, n, post, + norm); + } else if (porder == INFINITY) { + InfNorm<<>>(x, pre, n, post, + norm); + } else if (porder == -INFINITY) { + NegInfNorm<<>>(x, pre, n, + post, norm); + } else { + Pnorm<<>>(x, pre, n, post, + porder, norm); + } } }; @@ -112,7 +173,6 @@ __global__ void PnormGradient(const T* x, const T* x_norm, const T* y_grad, pnorm_i = x_norm[i]; yout_i = y_grad[i]; } - __syncthreads(); for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { @@ -125,6 +185,33 @@ __global__ void PnormGradient(const T* x, const T* x_norm, const T* y_grad, } } +template +__global__ void InfNormGradient(const T* x, const T* x_norm, const T* y_grad, + const int pre, const int axis_n, const int post, + T* x_grad) { + int num = pre * post; + for (int i = blockIdx.x; i < num; i += gridDim.x) { + __shared__ T pnorm_i; + __shared__ T yout_i; + auto base = (i / post) * post * axis_n + (i % post); + if (threadIdx.x == 0) { + pnorm_i = x_norm[i]; + yout_i = y_grad[i]; + } + __syncthreads(); + + for (int j = threadIdx.x; j < axis_n; j += blockDim.x) { + int index = base + j * post; + const T x_ij = inline_abs(x[index]); + if (x_ij == pnorm_i) { + x_grad[index] = inline_sign(x[index]) * yout_i; + } else { + x_grad[index] = static_cast(0); + } + } + } +} + template class PnormGradCUDAKernel : public framework::OpKernel { public: @@ -153,8 +240,17 @@ class PnormGradCUDAKernel : public framework::OpKernel { int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); const int max_blocks = std::max(max_threads / block, 1); int grid = std::min(max_blocks, pre * post); - PnormGradient<<>>( - x, x_norm, norm_dy, porder, pre, n, post, eps, dx); + if (porder == 0) { + math::SetConstant set_zero; + auto& dev_ctx = ctx.template device_context(); + set_zero(dev_ctx, out_dx, static_cast(0)); + } else if (porder == INFINITY || porder == -INFINITY) { + InfNormGradient<<>>( + x, x_norm, norm_dy, pre, n, post, dx); + } else { + PnormGradient<<>>( + x, x_norm, norm_dy, porder, pre, n, post, eps, dx); + } } }; diff --git a/paddle/fluid/operators/p_norm_op.h b/paddle/fluid/operators/p_norm_op.h index c5bdfe352723b55f80376d6644922af5de099e90..7620d1421e897f1a62ddf3a6c6e725e5a0f38bf0 100644 --- a/paddle/fluid/operators/p_norm_op.h +++ b/paddle/fluid/operators/p_norm_op.h @@ -58,10 +58,20 @@ class PnormKernel : public framework::OpKernel { auto x = x_e.reshape(shape); auto norm = norm_e.reshape(norm_shape); + // p=0 means number of non-zero elements of (x) + // p=inf means the maximum of |x| + // p=-inf means the minimum of |x| + // otherwise, Lp-norm = pow(sum(pow(|x|, p)), 1/p) Eigen::DSizes rdim(1); - auto xp = (x.abs()).pow(porder); - auto sum = xp.sum(rdim); - norm.device(*place) = sum.pow(1.0f / porder); + if (porder == 0) { + norm.device(*place) = (x != x.constant(0)).template cast().sum(rdim); + } else if (porder == INFINITY) { + norm.device(*place) = x.abs().maximum(rdim); + } else if (porder == -INFINITY) { + norm.device(*place) = x.abs().minimum(rdim); + } else { + norm.device(*place) = x.abs().pow(porder).sum(rdim).pow(1.0f / porder); + } } }; @@ -102,10 +112,20 @@ class PnormGradKernel : public framework::OpKernel { Eigen::DSizes rdim(1); Eigen::DSizes bcast(1, n, 1); - dx.device(*place) = (x.abs()).pow(porder - 1.0f); - dx.device(*place) = - dx / ((norm.broadcast(bcast)).pow(porder - 1.0f) + x.constant(eps)); - dx.device(*place) = dx * norm_dy.broadcast(bcast) * x.sign(); + if (porder == 0) { + math::SetConstant set_zero; + auto& dev_ctx = ctx.template device_context(); + set_zero(dev_ctx, out_dx, static_cast(0)); + } else if (porder == INFINITY || porder == -INFINITY) { + dx.device(*place) = + (x.abs() == norm.broadcast(bcast)).template cast() * x.sign() * + norm_dy.broadcast(bcast); + } else { + dx.device(*place) = + (x.abs()).pow(porder - 1.0f) / + ((norm.broadcast(bcast)).pow(porder - 1.0f) + x.constant(eps)); + dx.device(*place) = dx * norm_dy.broadcast(bcast) * x.sign(); + } } }; } // namespace operators diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1d41b823b6551647803ae5641f72955dbbc1eb62 --- /dev/null +++ b/paddle/fluid/operators/pad3d_op.cc @@ -0,0 +1,912 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +template +void ConstPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, const int out_d, + const int out_h, const int out_w, const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ConstPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, const int out_h, + const int out_w, const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + if (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) { + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = value; + } + } else { + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } + } +} + +template +void ReflectPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, const int out_w, + const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); // reflect by 0 + in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = std::max(in_h, -in_h); // reflect by 0 + in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = std::max(in_w, -in_w); // reflect by 0 + in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ReflectPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, const int out_h, + const int out_w, const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); + in_d = std::min(in_d, 2 * in_depth - in_d - 2); + in_h = std::max(in_h, -in_h); + in_h = std::min(in_h, 2 * in_height - in_h - 2); + in_w = std::max(in_w, -in_w); + in_w = std::min(in_w, 2 * in_width - in_w - 2); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void ReplicatePad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, const int out_w, + const T value) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ReplicatePad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, + const int out_h, const int out_w, const T value) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void CircularPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, const int out_w, + const T value) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void CircularPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, + const int out_h, const int out_w, const T value) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void Pad3DNCDHW(const T* in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, const int out_width, + const int pad_front, const int pad_top, const int pad_left, + T value, T* out_data, + void (*pad_func)(const T*, T*, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, const int, const int, + const int, const T)) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(in_data, out_data, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, + pad_left, out_d, out_h, out_w, value); + } + } + } + in_data += in_depth * in_height * in_width; + out_data += out_depth * out_height * out_width; + } + } +} + +template +void Pad3DNDHWC(const T* in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, const int out_width, + const int pad_front, const int pad_top, const int pad_left, + T value, T* out_data, + void (*pad_func)(const T*, T*, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, const T)) { + for (int n = 0; n < num; ++n) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(in_data, out_data, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, + pad_left, out_d, out_h, out_w, value); + } + } + } + in_data += in_depth * in_height * in_width * channels; + out_data += out_depth * out_height * out_width * channels; + } +} + +template +void ConstPad3DGradNCDHW(T* d_in_data, const T* d_out_data, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, const int out_d, + const int out_h, const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width)) { + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] = + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; + } +} + +template +void ConstPad3DGradNDHWC(T* d_in_data, const T* d_out_data, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width)) { + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] = d_out_data[out_index + c]; + } + } +} + +template +void ReflectPad3DGradNCDHW(T* d_in_data, const T* d_out_data, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); // reflect by 0 + in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = std::max(in_h, -in_h); // reflect by 0 + in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = std::max(in_w, -in_w); // reflect by 0 + in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void ReflectPad3DGradNDHWC(T* d_in_data, const T* d_out_data, + const int channels, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); + in_d = std::min(in_d, 2 * in_depth - in_d - 2); + in_h = std::max(in_h, -in_h); + in_h = std::min(in_h, 2 * in_height - in_h - 2); + in_w = std::max(in_w, -in_w); + in_w = std::min(in_w, 2 * in_width - in_w - 2); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void ReplicatePad3DGradNCDHW(T* d_in_data, const T* d_out_data, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, + const int out_h, const int out_w) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void ReplicatePad3DGradNDHWC(T* d_in_data, const T* d_out_data, + const int channels, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, + const int out_w) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void CircularPad3DGradNCDHW(T* d_in_data, const T* d_out_data, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const int out_d, + const int out_h, const int out_w) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void CircularPad3DGradNDHWC(T* d_in_data, const T* d_out_data, + const int channels, const int in_depth, + const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const int out_d, const int out_h, const int out_w) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void Pad3DGradNCDHW(T* d_in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data, + void (*pad_func)(T*, const T*, const int, const int, + const int, const int, const int, const int, + const int, const int, const int, const int, + const int, const int)) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(d_in_data, d_out_data, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, + pad_left, out_d, out_h, out_w); + } + } + } + d_in_data += in_depth * in_height * in_width; + d_out_data += out_depth * out_height * out_width; + } + } +} + +template +void Pad3DGradNDHWC(T* d_in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, + const int out_width, const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data, + void (*pad_func)(T*, const T*, const int, const int, + const int, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, const int)) { + for (int n = 0; n < num; ++n) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(d_in_data, d_out_data, channels, in_depth, in_height, + in_width, out_depth, out_height, out_width, pad_front, + pad_top, pad_left, out_d, out_h, out_w); + } + } + } + d_in_data += in_depth * in_height * in_width * channels; + d_out_data += out_depth * out_height * out_width * channels; + } +} + +static inline std::vector GetPaddings( + const framework::ExecutionContext& context) { + std::vector paddings(6); + auto* paddings_t = context.Input("Paddings"); + if (paddings_t) { + auto paddings_data = paddings_t->data(); + std::memcpy(paddings.data(), paddings_data, paddings.size() * sizeof(int)); + } else { + auto pads = context.Attr>("paddings"); + std::copy(pads.begin(), pads.end(), paddings.data()); + } + return paddings; +} + +template +class Pad3dCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + std::vector pads = GetPaddings(context); + auto mode = context.Attr("mode"); + auto data_format = context.Attr("data_format"); + T value = static_cast(context.Attr("value")); + + auto* x = context.Input("X"); + auto in_dims = x->dims(); + const T* in_data = x->data(); + + auto* out = context.Output("Out"); + if (data_format == "NCDHW") { + out->Resize({in_dims[0], in_dims[1], in_dims[2] + pads[4] + pads[5], + in_dims[3] + pads[2] + pads[3], + in_dims[4] + pads[0] + pads[1]}); + } else { + out->Resize({in_dims[0], in_dims[1] + pads[4] + pads[5], + in_dims[2] + pads[2] + pads[3], + in_dims[3] + pads[0] + pads[1], in_dims[4]}); + } + auto out_dims = out->dims(); + T* out_data = out->mutable_data(context.GetPlace()); + + int channels = in_dims[1]; + int in_depth = in_dims[2]; + int in_height = in_dims[3]; + int in_width = in_dims[4]; + int out_depth = out_dims[2]; + int out_height = out_dims[3]; + int out_width = out_dims[4]; + if (data_format == "NDHWC") { + channels = in_dims[4]; + in_depth = in_dims[1]; + in_height = in_dims[2]; + in_width = in_dims[3]; + out_depth = out_dims[1]; + out_height = out_dims[2]; + out_width = out_dims[3]; + } + + if (mode == "reflect") { + PADDLE_ENFORCE_GT(in_depth, pads[4], + platform::errors::InvalidArgument( + "The depth of Input(X)'s dimension should be " + "greater than pad_front" + " in reflect mode" + ", but received depth(%d) and pad_front(%d).", + in_depth, pads[4])); + PADDLE_ENFORCE_GT(in_depth, pads[5], + platform::errors::InvalidArgument( + "The depth of Input(X)'s dimension should be " + "greater than pad_back" + " in reflect mode" + ", but received depth(%d) and pad_back(%d).", + in_depth, pads[5])); + + PADDLE_ENFORCE_GT(in_height, pads[2], + platform::errors::InvalidArgument( + "The height of Input(X)'s dimension should be " + "greater than pad_top" + " in reflect mode" + ", but received depth(%d) and pad_top(%d).", + in_height, pads[2])); + PADDLE_ENFORCE_GT(in_height, pads[3], + platform::errors::InvalidArgument( + "The height of Input(X)'s dimension should be " + "greater than pad_bottom" + " in reflect mode" + ", but received depth(%d) and pad_bottom(%d).", + in_height, pads[3])); + + PADDLE_ENFORCE_GT(in_width, pads[0], + platform::errors::InvalidArgument( + "The width of Input(X)'s dimension should be " + "greater than pad_left" + " in reflect mode" + ", but received depth(%d) and pad_left(%d).", + in_width, pads[0])); + PADDLE_ENFORCE_GT(in_width, pads[1], + platform::errors::InvalidArgument( + "The width of Input(X)'s dimension should be " + "greater than pad_right" + " in reflect mode" + ", but received depth(%d) and pad_right(%d).", + in_width, pads[1])); + } + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = in_dims[0]; + if (data_format == "NCDHW") { + std::map + func_map; + + func_map["reflect"] = ReflectPad3DFuncNCDHW; + func_map["replicate"] = ReplicatePad3DFuncNCDHW; + func_map["circular"] = CircularPad3DFuncNCDHW; + func_map["constant"] = ConstPad3DFuncNCDHW; + Pad3DNCDHW(in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + value, out_data, func_map[mode]); + } else { + std::map + func_map; + + func_map["reflect"] = ReflectPad3DFuncNDHWC; + func_map["replicate"] = ReplicatePad3DFuncNDHWC; + func_map["circular"] = CircularPad3DFuncNDHWC; + func_map["constant"] = ConstPad3DFuncNDHWC; + Pad3DNDHWC(in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + value, out_data, func_map[mode]); + } + } +}; + +template +class Pad3dGradCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + std::vector pads = GetPaddings(context); + auto mode = context.Attr("mode"); + auto data_format = context.Attr("data_format"); + auto* d_out = context.Input(framework::GradVarName("Out")); + auto* d_in = context.Output(framework::GradVarName("X")); + auto d_in_dims = d_in->dims(); + auto d_out_dims = d_out->dims(); + const T* d_out_data = d_out->data(); + T* d_in_data = d_in->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + set_zero(context.template device_context(), + d_in, static_cast(0)); + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = d_in_dims[0]; + if (data_format == "NCDHW") { + const int channels = d_in_dims[1]; + const int in_depth = d_in_dims[2]; + const int in_height = d_in_dims[3]; + const int in_width = d_in_dims[4]; + const int out_depth = d_out_dims[2]; + const int out_height = d_out_dims[3]; + const int out_width = d_out_dims[4]; + + std::map + func_map; + + func_map["reflect"] = ReflectPad3DGradNCDHW; + func_map["replicate"] = ReplicatePad3DGradNCDHW; + func_map["circular"] = CircularPad3DGradNCDHW; + func_map["constant"] = ConstPad3DGradNCDHW; + + Pad3DGradNCDHW(d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, + pad_left, d_out_data, func_map[mode]); + } else { + const int channels = d_in_dims[4]; + const int in_depth = d_in_dims[1]; + const int in_height = d_in_dims[2]; + const int in_width = d_in_dims[3]; + const int out_depth = d_out_dims[1]; + const int out_height = d_out_dims[2]; + const int out_width = d_out_dims[3]; + + std::map + func_map; + + func_map["reflect"] = ReflectPad3DGradNDHWC; + func_map["replicate"] = ReplicatePad3DGradNDHWC; + func_map["circular"] = CircularPad3DGradNDHWC; + func_map["constant"] = ConstPad3DGradNDHWC; + + Pad3DGradNDHWC(d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, + pad_left, d_out_data, func_map[mode]); + } + } +}; + +class Pad3dOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad3d"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Pad3d"); + + auto x_dim = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(x_dim.size(), 5, + platform::errors::InvalidArgument( + "The size of Input(X)'s dimension should be equal to " + "5, but received %d. ", + x_dim.size())); + + std::vector out_dims(x_dim.size()); + auto data_format = ctx->Attrs().Get("data_format"); + out_dims[0] = x_dim[0]; + if (ctx->HasInput("Paddings")) { + auto paddings_dim = ctx->GetInputDim("Paddings"); + PADDLE_ENFORCE_EQ(paddings_dim.size(), 1, + platform::errors::InvalidArgument( + "Size of Input(Paddings)'s dimension should be " + "equal to 1, but received %d.", + paddings_dim.size())); + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(paddings_dim[0], 6, + platform::errors::InvalidArgument( + "Shape of Input(Paddings) should be equal to " + "[6], but received [%d].", + paddings_dim[0])); + } + out_dims[1] = x_dim[1]; + out_dims[2] = x_dim[2]; + out_dims[3] = x_dim[3]; + } else { + auto paddings = ctx->Attrs().Get>("paddings"); + PADDLE_ENFORCE_EQ( + paddings.size(), 6, + platform::errors::InvalidArgument( + "Size of paddings should be equal to 4, but received %d.", + static_cast(paddings.size()))); + if (data_format == "NCDHW") { + out_dims[1] = x_dim[1]; // channel + out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) + ? x_dim[2] + : (x_dim[2] + paddings[4] + paddings[5]); // depth + + out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0)) + ? x_dim[3] + : (x_dim[3] + paddings[2] + paddings[3]); // height + + out_dims[4] = ((!ctx->IsRuntime()) && (x_dim[4] < 0)) + ? x_dim[4] + : (x_dim[4] + paddings[0] + paddings[1]); // width + } else { // NDHWC + out_dims[4] = x_dim[4]; // channel + + out_dims[1] = ((!ctx->IsRuntime()) && (x_dim[1] < 0)) + ? x_dim[1] + : (x_dim[1] + paddings[4] + paddings[5]); // depth + out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) + ? x_dim[2] + : (x_dim[2] + paddings[2] + paddings[3]); // height + out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0)) + ? x_dim[3] + : (x_dim[3] + paddings[0] + paddings[1]); // width + } + } + + ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +class Pad3dOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "The input of pad3d op. " + "The input should be a 5-D tensor with formate NCDHW or NDHWC."); + AddOutput("Out", + "The output of pad3d op. " + "A tensor with the same shape as X."); + AddInput("Paddings", + "A 1-D tensor to describe the padding rules." + "paddings=[0, 1, 2, 3, 4, 5] means " + "padding 0 column to left, 1 column to right, " + "2 row to top, 3 row to bottom, 4 depth to front " + "and 5 depth to back. Size of paddings must be 6.") + .AsDispensable(); + AddAttr>( + "paddings", + "(vector) " + "A list to describe the padding rules." + "paddings=[0, 1, 2, 3, 4, 5] means " + "padding 0 column to left, 1 column to right, " + "2 row to top, 3 row to bottom, 4 depth to front " + "and 5 depth to back. Size of paddings must be 6."); + AddAttr("value", + "(float, default 0.0) " + "The value to fill the padded areas in constant mode.") + .SetDefault(0.0f); + AddAttr( + "mode", + "(string, default constant) " + "Four modes: constant(default), reflect, replicate, circular.") + .SetDefault("constant"); + AddAttr( + "data_format", + "(string, default NCDHW) Only used in " + "An optional string from: \"NDHWC\", \"NCDHW\". " + "Defaults to \"NDHWC\". Specify the data format of the input data.") + .SetDefault("NCDHW"); + AddComment(R"DOC( +Pad3d Operator. +Pad 3-d images according to 'paddings' and 'mode'. +If mode is 'reflect', paddings[0] and paddings[1] must be no greater +than width-1. The height and depth dimension have the same condition. + +Given that X is a channel of image from input: + +X = [[[[[1, 2, 3], + [4, 5, 6]]]]] + +Case 0: + +paddings = [2, 2, 1, 1, 0, 0], +mode = 'constant' +pad_value = 0 + +Out = [[[[[0. 0. 0. 0. 0. 0. 0.] + [0. 0. 1. 2. 3. 0. 0.] + [0. 0. 4. 5. 6. 0. 0.] + [0. 0. 0. 0. 0. 0. 0.]]]]] + +Case 1: + +paddings = [2, 2, 1, 1, 0, 0], +mode = 'reflect' + +Out = [[[[[6. 5. 4. 5. 6. 5. 4.] + [3. 2. 1. 2. 3. 2. 1.] + [6. 5. 4. 5. 6. 5. 4.] + [3. 2. 1. 2. 3. 2. 1.]]]]] + +Case 2: + +paddings = [2, 2, 1, 1, 0, 0], +mode = 'replicate' + +Out = [[[[[1. 1. 1. 2. 3. 3. 3.] + [1. 1. 1. 2. 3. 3. 3.] + [4. 4. 4. 5. 6. 6. 6.] + [4. 4. 4. 5. 6. 6. 6.]]]]] + +Case 3: + +paddings = [2, 2, 1, 1, 0, 0], +mode = 'circular' + +Out = [[[[[5. 6. 4. 5. 6. 4. 5.] + [2. 3. 1. 2. 3. 1. 2.] + [5. 6. 4. 5. 6. 4. 5.] + [2. 3. 1. 2. 3. 1. 2.]]]]] + +)DOC"); + } +}; + +class Pad3dOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad3d@Grad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "Pad3d@Grad"); + + auto x_dims = ctx->GetInputDim("X"); + auto x_grad_name = framework::GradVarName("X"); + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.GetPlace()); + } +}; + +template +class Pad3dOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr bind) const override { + bind->SetInput("X", this->Input("X")); + if (this->HasInput("Paddings")) { + bind->SetInput("Paddings", this->Input("Paddings")); + } + bind->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + bind->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + bind->SetAttrMap(this->Attrs()); + bind->SetType("pad3d_grad"); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERER(Pad3dOpGradNoNeedBufferVarsInferer, "X"); + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(pad3d, ops::Pad3dOp, ops::Pad3dOpMaker, + ops::Pad3dOpGradMaker, + ops::Pad3dOpGradMaker); +REGISTER_OPERATOR(pad3d_grad, ops::Pad3dOpGrad, + ops::Pad3dOpGradNoNeedBufferVarsInferer); +REGISTER_OP_CPU_KERNEL(pad3d, ops::Pad3dCPUKernel, + ops::Pad3dCPUKernel, ops::Pad3dCPUKernel, + ops::Pad3dCPUKernel); +REGISTER_OP_CPU_KERNEL(pad3d_grad, ops::Pad3dGradCPUKernel, + ops::Pad3dGradCPUKernel); diff --git a/paddle/fluid/operators/pad3d_op.cu b/paddle/fluid/operators/pad3d_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..672a75389ccf18d11e508ca94d45128b2e7b56b7 --- /dev/null +++ b/paddle/fluid/operators/pad3d_op.cu @@ -0,0 +1,788 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace operators { + +using platform::PADDLE_CUDA_NUM_THREADS; + +using framework::Tensor; + +template +__global__ void Pad3DConstNCDHW(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T value, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + out_data[index] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w]; + } +} + +template +__global__ void Pad3DConstNDHWC(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T value, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + const int in_d = out_d - pad_front; + const int in_h = out_h - pad_top; + const int in_w = out_w - pad_left; + + out_data[index] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DReflectNCDHW(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); // reflect by 0 + in_d = min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = max(in_h, -in_h); // reflect by 0 + in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = max(in_w, -in_w); // reflect by 0 + in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DReflectNDHWC(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_d = min(in_d, 2 * in_depth - in_d - 2); + in_h = max(in_h, -in_h); + in_h = min(in_h, 2 * in_height - in_h - 2); + in_w = max(in_w, -in_w); + in_w = min(in_w, 2 * in_width - in_w - 2); + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DReplicateNCDHW(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DReplicateNDHWC(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DCircularNCDHW(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DCircularNDHWC(const int nthreads, const T* in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DGradConstNCDHW(const int in_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data) { + CUDA_KERNEL_LOOP(in_index, in_size) { + const int in_w = in_index % in_width; + + int nc = in_index / in_width; + const int in_h = nc % in_height; + + nc /= in_height; + const int in_d = nc % in_depth; + + nc /= in_depth; + + const int out_d = in_d + pad_front; + const int out_h = in_h + pad_top; + const int out_w = in_w + pad_left; + d_in_data[in_index] = + d_out_data[nc * out_depth * out_height * out_width + + out_d * out_height * out_width + out_h * out_width + out_w]; + } +} + +template +__global__ void Pad3DGradConstNDHWC(const int in_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data) { + CUDA_KERNEL_LOOP(in_index, in_size) { + const int c = in_index % channels; + int n = in_index / channels; + + const int in_w = n % in_width; + n /= in_width; + + const int in_h = n % in_height; + n /= in_height; + + const int in_d = n % in_depth; + n /= in_depth; + + const int out_d = in_d + pad_front; + const int out_h = in_h + pad_top; + const int out_w = in_w + pad_left; + + d_in_data[in_index] = + d_out_data[n * out_depth * out_height * out_width * channels + + out_d * out_height * out_width * channels + + out_h * out_width * channels + out_w * channels + c]; + } +} + +template +__global__ void Pad3DGradReflectNCDHW(const int out_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_h = max(in_h, -in_h); + in_w = max(in_w, -in_w); + + in_d = min(in_d, 2 * in_depth - in_d - 2); + in_h = min(in_h, 2 * in_height - in_h - 2); + in_w = min(in_w, 2 * in_width - in_w - 2); + + platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReflectNDHWC(const int out_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, const int out_width, + const int pad_front, const int pad_top, + const int pad_left, const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_h = max(in_h, -in_h); + in_w = max(in_w, -in_w); + + in_d = min(in_d, in_depth * 2 - in_d - 2); + in_h = min(in_h, in_height * 2 - in_h - 2); + in_w = min(in_w, in_width * 2 - in_w - 2); + platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReplicateNCDHW( + const int out_size, T* d_in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, const int out_width, + const int pad_front, const int pad_top, const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReplicateNDHWC( + const int out_size, T* d_in_data, const int num, const int channels, + const int in_depth, const int in_height, const int in_width, + const int out_depth, const int out_height, const int out_width, + const int pad_front, const int pad_top, const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradCircularNCDHW(const int out_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradCircularNDHWC(const int out_size, T* d_in_data, + const int num, const int channels, + const int in_depth, const int in_height, + const int in_width, const int out_depth, + const int out_height, + const int out_width, const int pad_front, + const int pad_top, const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +static inline std::vector GetPaddings( + const framework::ExecutionContext& context) { + std::vector paddings(6); + auto* paddings_data = context.Input("Paddings"); + if (paddings_data) { + Tensor pads; + framework::TensorCopySync(*paddings_data, platform::CPUPlace(), &pads); + auto pads_data = pads.data(); + std::memcpy(paddings.data(), pads_data, paddings.size() * sizeof(int)); + } else { + auto pads = context.Attr>("paddings"); + std::copy(pads.begin(), pads.end(), paddings.data()); + } + return paddings; +} + +template +class Pad3dCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + std::vector pads = GetPaddings(context); + auto mode = context.Attr("mode"); + auto data_format = context.Attr("data_format"); + T value = static_cast(context.Attr("value")); + + auto* x = context.Input("X"); + auto in_dims = x->dims(); + const T* in_data = x->data(); + auto* out = context.Output("Out"); + auto out_dims = out->dims(); + if (data_format == "NCDHW") { + out_dims[0] = in_dims[0]; + out_dims[1] = in_dims[1]; + out_dims[2] = in_dims[2] + pads[4] + pads[5]; + out_dims[3] = in_dims[3] + pads[2] + pads[3]; + out_dims[4] = in_dims[4] + pads[0] + pads[1]; + } else { + out_dims[0] = in_dims[0]; + out_dims[1] = in_dims[1] + pads[4] + pads[5]; + out_dims[2] = in_dims[2] + pads[2] + pads[3]; + out_dims[3] = in_dims[3] + pads[0] + pads[1]; + out_dims[4] = in_dims[4]; + } + T* out_data = out->mutable_data(out_dims, context.GetPlace()); + + int channels = in_dims[1]; + int in_depth = in_dims[2]; + int in_height = in_dims[3]; + int in_width = in_dims[4]; + int out_depth = out_dims[2]; + int out_height = out_dims[3]; + int out_width = out_dims[4]; + if (data_format == "NDHWC") { + channels = in_dims[4]; + in_depth = in_dims[1]; + in_height = in_dims[2]; + in_width = in_dims[3]; + out_depth = out_dims[1]; + out_height = out_dims[2]; + out_width = out_dims[3]; + } + + if (mode == "reflect") { + PADDLE_ENFORCE_GT(in_depth, pads[4], + platform::errors::InvalidArgument( + "The depth of Input(X)'s dimension should be " + "greater than pad_front" + " in reflect mode" + ", but received depth(%d) and pad_front(%d).", + in_depth, pads[4])); + PADDLE_ENFORCE_GT(in_depth, pads[5], + platform::errors::InvalidArgument( + "The depth of Input(X)'s dimension should be " + "greater than pad_back" + " in reflect mode" + ", but received depth(%d) and pad_back(%d).", + in_depth, pads[5])); + + PADDLE_ENFORCE_GT(in_height, pads[2], + platform::errors::InvalidArgument( + "The height of Input(X)'s dimension should be " + "greater than pad_top" + " in reflect mode" + ", but received depth(%d) and pad_top(%d).", + in_height, pads[2])); + PADDLE_ENFORCE_GT(in_height, pads[3], + platform::errors::InvalidArgument( + "The height of Input(X)'s dimension should be " + "greater than pad_bottom" + " in reflect mode" + ", but received depth(%d) and pad_bottom(%d).", + in_height, pads[3])); + + PADDLE_ENFORCE_GT(in_width, pads[0], + platform::errors::InvalidArgument( + "The width of Input(X)'s dimension should be " + "greater than pad_left" + " in reflect mode" + ", but received depth(%d) and pad_left(%d).", + in_width, pads[0])); + PADDLE_ENFORCE_GT(in_width, pads[1], + platform::errors::InvalidArgument( + "The width of Input(X)'s dimension should be " + "greater than pad_right" + " in reflect mode" + ", but received depth(%d) and pad_right(%d).", + in_width, pads[1])); + } + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = in_dims[0]; + + auto stream = context.cuda_device_context().stream(); + int block = PADDLE_CUDA_NUM_THREADS; + const int out_size = out->numel(); + int grid = (out_size + block - 1) / block; + + if (data_format == "NCDHW") { + if (mode == "reflect") { + Pad3DReflectNCDHW<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else if (mode == "replicate") { + Pad3DReplicateNCDHW<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else if (mode == "circular") { + Pad3DCircularNCDHW<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else { + Pad3DConstNCDHW<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + value, out_data); + } + } else { + if (mode == "reflect") { + Pad3DReflectNDHWC<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else if (mode == "replicate") { + Pad3DReplicateNDHWC<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else if (mode == "circular") { + Pad3DCircularNDHWC<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + out_data); + } else { + Pad3DConstNDHWC<<>>( + out_size, in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + value, out_data); + } + } + } +}; + +template +class Pad3dGradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + std::vector pads = GetPaddings(context); + auto mode = context.Attr("mode"); + auto data_format = context.Attr("data_format"); + auto* d_out = context.Input(framework::GradVarName("Out")); + auto* d_in = context.Output(framework::GradVarName("X")); + auto d_in_dims = d_in->dims(); + auto d_out_dims = d_out->dims(); + const T* d_out_data = d_out->data(); + T* d_in_data = d_in->mutable_data(context.GetPlace()); + + math::SetConstant set_zero; + set_zero(context.template device_context(), + d_in, static_cast(0)); + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + + const int num = d_in_dims[0]; + + auto stream = context.cuda_device_context().stream(); + int block = PADDLE_CUDA_NUM_THREADS; + const int out_size = d_out->numel(); + const int in_size = d_in->numel(); + int grid = (out_size + block - 1) / block; + + if (data_format == "NCDHW") { + const int channels = d_in_dims[1]; + const int in_depth = d_in_dims[2]; + const int in_height = d_in_dims[3]; + const int in_width = d_in_dims[4]; + const int out_depth = d_out_dims[2]; + const int out_height = d_out_dims[3]; + const int out_width = d_out_dims[4]; + + if (mode == "reflect") { + Pad3DGradReflectNCDHW<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else if (mode == "replicate") { + Pad3DGradReplicateNCDHW<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else if (mode == "circular") { + Pad3DGradCircularNCDHW<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else { + grid = (in_size + block - 1) / block; + Pad3DGradConstNCDHW<<>>( + in_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } + } else { + const int channels = d_in_dims[4]; + const int in_depth = d_in_dims[1]; + const int in_height = d_in_dims[2]; + const int in_width = d_in_dims[3]; + const int out_depth = d_out_dims[1]; + const int out_height = d_out_dims[2]; + const int out_width = d_out_dims[3]; + if (mode == "reflect") { + Pad3DGradReflectNDHWC<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else if (mode == "replicate") { + Pad3DGradReplicateNDHWC<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else if (mode == "circular") { + Pad3DGradCircularNDHWC<<>>( + out_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } else { + grid = (in_size + block - 1) / block; + Pad3DGradConstNDHWC<<>>( + in_size, d_in_data, num, channels, in_depth, in_height, in_width, + out_depth, out_height, out_width, pad_front, pad_top, pad_left, + d_out_data); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(pad3d, ops::Pad3dCUDAKernel, + ops::Pad3dCUDAKernel, + ops::Pad3dCUDAKernel, ops::Pad3dCUDAKernel, + ops::Pad3dCUDAKernel); +REGISTER_OP_CUDA_KERNEL(pad3d_grad, ops::Pad3dGradCUDAKernel, + ops::Pad3dGradCUDAKernel, + ops::Pad3dGradCUDAKernel); diff --git a/paddle/fluid/operators/pixel_shuffle_op.cc b/paddle/fluid/operators/pixel_shuffle_op.cc index 1ed7988dcfcc0831156c09a72e958852f3d45fb5..70d232ad6a51e21b863974e70920eb2d9da895e6 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.cc +++ b/paddle/fluid/operators/pixel_shuffle_op.cc @@ -28,25 +28,44 @@ class PixelShuffleOp : public framework::OperatorWithKernel { "Output(Out) of PixelShuffleOp should not be null.")); auto input_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - input_dims.size(), 4, - platform::errors::InvalidArgument( - "Input should be a 4-D tensor of format [N, C, H, W], but got %u.", - input_dims.size())); + PADDLE_ENFORCE_EQ(input_dims.size(), 4, + platform::errors::InvalidArgument( + "Input should be a 4-D tensor of format [N, C, H, W] " + "or [N, H, W, C], but got %u.", + input_dims.size())); auto upscale_factor = ctx->Attrs().Get("upscale_factor"); - PADDLE_ENFORCE_EQ(input_dims[1] % (upscale_factor * upscale_factor), 0, - platform::errors::InvalidArgument( - "The square of upscale_factor[%u] should divide the " - "number of channel[%u]", - input_dims[1], upscale_factor * upscale_factor)); - + const std::string data_format = + ctx->Attrs().Get("data_format"); + const bool channel_last = (data_format == "NHWC"); + + if (!channel_last) { + PADDLE_ENFORCE_EQ( + input_dims[1] % (upscale_factor * upscale_factor), 0, + platform::errors::InvalidArgument( + "The square of upscale_factor[%u] should divide the " + "number of channel[%u]", + input_dims[1], upscale_factor * upscale_factor)); + } else { + PADDLE_ENFORCE_EQ( + input_dims[3] % (upscale_factor * upscale_factor), 0, + platform::errors::InvalidArgument( + "The square of upscale_factor[%u] should divide the " + "number of channel[%u]", + input_dims[3], upscale_factor * upscale_factor)); + } auto output_dims = input_dims; output_dims[0] = input_dims[0]; - output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor); - output_dims[2] = input_dims[2] * upscale_factor; - output_dims[3] = input_dims[3] * upscale_factor; + if (!channel_last) { + output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor); + output_dims[2] = input_dims[2] * upscale_factor; + output_dims[3] = input_dims[3] * upscale_factor; + } else { + output_dims[1] = input_dims[1] * upscale_factor; + output_dims[2] = input_dims[2] * upscale_factor; + output_dims[3] = input_dims[3] / (upscale_factor * upscale_factor); + } ctx->SetOutputDim("Out", output_dims); } }; @@ -54,14 +73,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel { class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput( - "X", - "(Tensor, default Tensor), " - "the input feature data of PixelShuffleOp, the layout is [N C H W]."); - AddOutput( - "Out", - "(Tensor, default Tensor), the output of " - "PixelShuffleOp. The layout is [N,C/factor^2,H*factor,W*factor]."); + AddInput("X", + "(Tensor, default Tensor), " + "the input feature data of PixelShuffleOp, the layout is [N, C, " + "H, W] or [N, H, W, C]."); + AddOutput("Out", + "(Tensor, default Tensor), the output of " + "PixelShuffleOp. The layout is [N, C/factor^2, H*factor, " + "W*factor] or [N, H*factor, W*factor, C/factor^2]."); AddAttr("upscale_factor", "the factor to increase spatial resolution by.") .SetDefault(1) @@ -70,6 +89,11 @@ class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { platform::errors::InvalidArgument( "upscale_factor should be larger than 0.")); }); + AddAttr( + "data_format", + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\", Specify the data format of the input data.") + .SetDefault("NCHW"); AddComment(R"DOC( Pixel Shuffle operator @@ -114,19 +138,30 @@ class PixelShuffleGradOp : public framework::OperatorWithKernel { platform::errors::NotFound("Output(X@Grad) should not be null")); auto do_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ( - do_dims.size(), 4, - platform::errors::InvalidArgument( - "Input should be a 4-D tensor of format [N, C, H, W], but got %u.", - do_dims.size())); + PADDLE_ENFORCE_EQ(do_dims.size(), 4, + platform::errors::InvalidArgument( + "Input should be a 4-D tensor of format [N, C, H, W] " + "or [N, H, W, C], but got %u.", + do_dims.size())); auto upscale_factor = ctx->Attrs().Get("upscale_factor"); + const std::string data_format = + ctx->Attrs().Get("data_format"); + const bool channel_last = (data_format == "NHWC"); + auto dx_dims = do_dims; dx_dims[0] = do_dims[0]; - dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor); - dx_dims[2] = do_dims[2] / upscale_factor; - dx_dims[3] = do_dims[3] / upscale_factor; + + if (!channel_last) { + dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor); + dx_dims[2] = do_dims[2] / upscale_factor; + dx_dims[3] = do_dims[3] / upscale_factor; + } else { + dx_dims[1] = do_dims[1] / upscale_factor; + dx_dims[2] = do_dims[2] / upscale_factor; + dx_dims[3] = do_dims[3] * (upscale_factor * upscale_factor); + } ctx->SetOutputDim(framework::GradVarName("X"), dx_dims); } }; diff --git a/paddle/fluid/operators/pixel_shuffle_op.h b/paddle/fluid/operators/pixel_shuffle_op.h index 1ae1c7e9d50cb9d701fd0e79337a1906f2f5d545..b2a0db0f838d5dcc3fed2ed9838f1c43240ce0e7 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.h +++ b/paddle/fluid/operators/pixel_shuffle_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" @@ -24,23 +25,33 @@ class PixelShuffleOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); int factor = ctx.Attr("upscale_factor"); + std::string data_format = ctx.Attr("data_format"); + bool channel_last = (data_format == "NHWC"); + auto in_dims = in->dims(); auto o_dims = out->dims(); framework::Tensor t; t.ShareDataWith(*in); - t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]}); - + if (!channel_last) { + t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]}); + } else { + t.Resize({in_dims[0], in_dims[1], in_dims[2], o_dims[3], factor, factor}); + } std::vector axis = {0, 1, 4, 2, 5, 3}; framework::Tensor o; o.ShareDataWith(*out); - o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor}); - + if (!channel_last) { + o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor}); + } else { + o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]}); + } math::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); @@ -58,19 +69,32 @@ class PixelShuffleGradOpKernel : public framework::OpKernel { int factor = ctx.Attr("upscale_factor"); + std::string data_format = ctx.Attr("data_format"); + bool channel_last = (data_format == "NHWC"); + auto do_dims = dout->dims(); auto dx_dims = dx->dims(); framework::Tensor t; t.ShareDataWith(*dout); - t.Resize({do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor}); - + if (!channel_last) { + t.Resize( + {do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor}); + } else { + t.Resize( + {do_dims[0], dx_dims[1], factor, dx_dims[2], factor, do_dims[3]}); + } std::vector axis = {0, 1, 3, 5, 2, 4}; framework::Tensor o; o.ShareDataWith(*dx); - o.Resize({do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]}); - + if (!channel_last) { + o.Resize( + {do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]}); + } else { + o.Resize( + {do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor}); + } math::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); diff --git a/paddle/fluid/operators/randint_op.cc b/paddle/fluid/operators/randint_op.cc index 9f6df3f32b7463b30804555cbce4d4ee8f03a989..662fe3bcb3b3b2d26afaef0c9388dda329aea645 100644 --- a/paddle/fluid/operators/randint_op.cc +++ b/paddle/fluid/operators/randint_op.cc @@ -14,6 +14,8 @@ #include #include + +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/uniform_random_op.h" @@ -37,20 +39,30 @@ class CPURandintKernel : public framework::OpKernel { new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); } } - auto* out = ctx.Output("Out"); if (!new_shape.empty()) out->Resize(framework::make_ddim(new_shape)); T* data = out->mutable_data(ctx.GetPlace()); int64_t size = out->numel(); - unsigned int seed = static_cast(ctx.Attr("seed")); - std::minstd_rand engine; - if (seed == 0) { - seed = std::random_device()(); - } - engine.seed(seed); + std::uniform_int_distribution dist(ctx.Attr("low"), ctx.Attr("high") - 1); - for (int64_t i = 0; i < size; ++i) data[i] = dist(engine); + + if (framework::Generator::GetInstance()->is_init_py) { + std::mt19937_64& gen_engine = + framework::Generator::GetInstance()->GetCPUEngine(); + for (int64_t i = 0; i < size; ++i) data[i] = dist(gen_engine); + } else { + unsigned int seed = static_cast(ctx.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } + } } }; diff --git a/paddle/fluid/operators/randperm_op.h b/paddle/fluid/operators/randperm_op.h index 64ef1c771423f2d820c73df8ed9ff25834f07875..0eb028ad806848a559ba51b9c950d324a598a851 100644 --- a/paddle/fluid/operators/randperm_op.h +++ b/paddle/fluid/operators/randperm_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/place.h" @@ -31,11 +32,17 @@ static inline void random_permate(T* data_ptr, int num, unsigned int seed) { for (int i = 0; i < num; ++i) { data_ptr[i] = static_cast(i); } - if (seed == 0) { - seed = std::random_device()(); + if (framework::Generator::GetInstance()->is_init_py) { + std::shuffle(data_ptr, data_ptr + num, + framework::Generator::GetInstance()->GetCPUEngine()); + + } else { + if (seed == 0) { + seed = std::random_device()(); + } + std::srand(seed); + std::random_shuffle(data_ptr, data_ptr + num); } - std::srand(seed); - std::random_shuffle(data_ptr, data_ptr + num); } template @@ -51,6 +58,7 @@ class RandpermKernel : public framework::OpKernel { if (platform::is_cpu_place(ctx.GetPlace())) { T* out_data = out_tensor->mutable_data(platform::CPUPlace()); random_permate(out_data, n, seed); + } else { framework::Tensor tmp_tensor; tmp_tensor.Resize(framework::make_ddim({n})); diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..322a1637f5deec909db13f1bd0433446cd7606ae --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" +#include +#include +#include +#include + +namespace paddle { +namespace operators { + +class LogsumexpOpMaker : public ops::ReduceOpMaker { + protected: + virtual std::string GetName() const { return "logsumexp"; } + virtual std::string GetOpType() const { return "Reduce logsumexp"; } +}; + +template +class LogsumexpGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("logsumexp_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Out", this->Output("Out")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetAttrMap(this->Attrs()); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(logsumexp, ops::ReduceOp, ops::LogsumexpOpMaker, + ops::LogsumexpGradOpMaker, + ops::LogsumexpGradOpMaker); +REGISTER_OPERATOR(logsumexp_grad, ops::ReduceGradOp); + +REGISTER_OP_CPU_KERNEL(logsumexp, + ops::ReduceKernel, + ops::ReduceKernel); +REGISTER_OP_CPU_KERNEL( + logsumexp_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op.cu b/paddle/fluid/operators/reduce_ops/logsumexp_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..c25e5d01b2758a96192d6fbf8f4e881770cbbbf0 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cu @@ -0,0 +1,27 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/cub_reduce.h" +#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" + +REGISTER_OP_CUDA_KERNEL(logsumexp, + ops::ReduceKernel, + ops::ReduceKernel); +REGISTER_OP_CUDA_KERNEL( + logsumexp_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op.h b/paddle/fluid/operators/reduce_ops/logsumexp_op.h new file mode 100644 index 0000000000000000000000000000000000000000..1d0e00262a37ff7160abd7a865e63377f8b30461 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.h @@ -0,0 +1,58 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/operators/reduce_ops/reduce_op.h" + +namespace paddle { +namespace operators { + +struct LogsumexpFunctor { + template + void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { + auto x_dim = x->dimensions(); + auto t_dim = x_dim; + for (int i = 0; i < static_cast(dim.size()); i++) { + t_dim[dim[i]] = 1; + } + + auto r_dim = x_dim; + for (int i = 0; i < static_cast(r_dim.size()); i++) { + r_dim[i] = 1; + } + for (int i = 0; i < static_cast(dim.size()); i++) { + r_dim[dim[i]] = x_dim[dim[i]]; + } + + auto y_dim = y->dimensions(); + auto x_max = x->maximum(dim); + y->device(place) = + (x_max + + (*x - x_max.reshape(t_dim).broadcast(r_dim)).exp().sum(dim).log()) + .reshape(y_dim); + } +}; + +struct LogsumexpGradFunctor { + template + void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy, + const Dim& dim, int size) { + dx->device(place) = dy->broadcast(dim) * (*x - y->broadcast(dim)).exp(); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index fccf6d46895ff46c40d0a5c20d4cf1b614ad8a9e..fdb2c57385b2bc1068c618f206bfeb6513d3d8c4 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -103,11 +103,7 @@ REGISTER_OP_CPU_KERNEL(reduce_mean, ops::ReduceKernel, ops::ReduceKernel, - ops::ReduceKernel, - ops::ReduceKernel); + double, ops::MeanFunctor>); template using CPUReduceMeanGradKernel = @@ -115,6 +111,4 @@ using CPUReduceMeanGradKernel = ops::MeanGradFunctor, true>; REGISTER_OP_CPU_KERNEL(reduce_mean_grad, CPUReduceMeanGradKernel, - CPUReduceMeanGradKernel, - CPUReduceMeanGradKernel, - CPUReduceMeanGradKernel); + CPUReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu index 4d3bce8fdd05e536baa5fecb4fc5a117e2031224..cc3653fcb43a4c000d0c61c9d854965fafd59a9c 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu @@ -66,6 +66,4 @@ class ReduceMeanKernel : public framework::OpKernel { } // namespace paddle REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel, - ops::ReduceMeanKernel, - ops::ReduceMeanKernel, - ops::ReduceMeanKernel); + ops::ReduceMeanKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu index 12eceb33ec27298d60713e72c9cc2cf91a5e7cfb..289f574719ff03b1b09f313d05bab152f5c5d651 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu @@ -21,6 +21,4 @@ using CUDAReduceMeanGradKernel = ops::MeanGradFunctor, true>; REGISTER_OP_CUDA_KERNEL(reduce_mean_grad, CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel); + CUDAReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index d70df5cd73847e5f63ce0b44b57dbb840d98b522..67a19cb83c36f9cb6ef0cdd65e9fc04a7bb4d169 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -236,8 +236,8 @@ class ReduceGradKernel : public framework::OpKernel { if (reduce_all) { auto x = EigenVector::Flatten(*input0); - auto x_reduce = EigenVector::From(*input1); - auto x_reduce_grad = EigenVector::From(*input2); + auto x_reduce = EigenVector::Flatten(*input1); + auto x_reduce_grad = EigenVector::Flatten(*input2); auto x_grad = EigenVector::Flatten(*output); auto& place = *context.template device_context().eigen_device(); @@ -334,6 +334,12 @@ class ReduceOp : public framework::OperatorWithKernel { "range [-dimension(X), dimension(X)] " "which dimesion = %d. But received dim index = %d.", i, x_rank, dims[i])); + PADDLE_ENFORCE_GE(dims[i], -x_rank, + platform::errors::InvalidArgument( + "The reduce dim index %d should be in the " + "range [-dimension(X), dimension(X)] " + "which dimesion = %d. But received dim index = %d.", + i, x_rank, dims[i])); if (dims[i] < 0) dims[i] = x_rank + dims[i]; } sort(dims.begin(), dims.end()); diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index c0fbc336e46b64fc6ee43ef1a7372e413c5c3213..1c493fc6be093a2af8f58c8e78d1be43de34306f 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -29,6 +29,11 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/variable.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +DECLARE_bool(use_mkldnn); namespace paddle { namespace operators { @@ -262,6 +267,9 @@ class RunProgramOpKernel : public framework::OpKernel { } VLOG(2) << "The number of sub scopes after forward: " << out_scope_vec->front()->kids().size(); +#ifdef PADDLE_WITH_MKLDNN + if (FLAGS_use_mkldnn) DontClearMKLDNNCache(ctx.GetPlace()); +#endif } }; diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index 5ec32c98f7f84abb255ec996d0cf6a58e6312ec3..a09220b1ccd13604b6d842237c8176578967ac64 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -21,6 +21,7 @@ #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -61,7 +62,9 @@ class SamplingIdKernel : public framework::OpKernel { std::vector ids(batch_size); for (int i = 0; i < batch_size; ++i) { - T r = dist(engine); + T r = framework::Generator::GetInstance()->is_init_py + ? dist(framework::Generator::GetInstance()->GetCPUEngine()) + : dist(engine); int idx = width - 1; for (int j = 0; j < width; ++j) { if ((r -= ins_vector[i * width + j]) < 0) { diff --git a/paddle/fluid/operators/size_op.cc b/paddle/fluid/operators/size_op.cc index 06eaca0216b36a50028fd7cfd3c0866a5b7c1de0..b45fa7c791ff22be422ce12a8348a071c60ddd0f 100644 --- a/paddle/fluid/operators/size_op.cc +++ b/paddle/fluid/operators/size_op.cc @@ -54,5 +54,6 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(size, ops::SizeKernel, ops::SizeKernel, + ops::SizeKernel, ops::SizeKernel, ops::SizeKernel, ops::SizeKernel); diff --git a/paddle/fluid/operators/size_op.cu b/paddle/fluid/operators/size_op.cu index 4e5846660e62543638b669d586a92fc36b0c8e87..3ea3032693236d5618ff6f0c858cbd85e34633ab 100644 --- a/paddle/fluid/operators/size_op.cu +++ b/paddle/fluid/operators/size_op.cu @@ -14,8 +14,9 @@ limitations under the License. */ #include "paddle/fluid/operators/size_op.h" -REGISTER_OP_CUDA_KERNEL(size, paddle::operators::SizeKernel, - paddle::operators::SizeKernel, - paddle::operators::SizeKernel, - paddle::operators::SizeKernel, - paddle::operators::SizeKernel); +REGISTER_OP_CUDA_KERNEL( + size, paddle::operators::SizeKernel, + paddle::operators::SizeKernel, + paddle::operators::SizeKernel, + paddle::operators::SizeKernel, paddle::operators::SizeKernel, + paddle::operators::SizeKernel); diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index 7528422fdc09b7894898bdee94eaa11ad2cba311..f20bada8ab288fe74fd8ca82a73522a22b234191 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h index f416aa6e00f5a4a82c2562c36f9d32bb1a6843aa..cc2fe4cdbdb8faa69abad28fbdd31dc4e61bdc04 100644 --- a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h @@ -41,12 +41,12 @@ static void InitRandom(framework::Tensor *tensor, template struct LeakyReluGradGradEachElementFunctor { - LeakyReluGradGradEachElementFunctor(const T *ddx, const T *out, T alpha, + LeakyReluGradGradEachElementFunctor(const T *ddx, const T *x, T alpha, T *ddout) - : ddx_(ddx), out_(out), alpha_(alpha), ddout_(ddout) {} + : ddx_(ddx), x_(x), alpha_(alpha), ddout_(ddout) {} HOSTDEVICE void operator()(int idx) { - if (out_[idx] > 0) { + if (x_[idx] >= 0) { ddout_[idx] = ddx_[idx]; } else { ddout_[idx] = ddx_[idx] * alpha_; @@ -54,7 +54,7 @@ struct LeakyReluGradGradEachElementFunctor { } const T *ddx_; - const T *out_; + const T *x_; T alpha_; T *ddout_; }; @@ -66,13 +66,13 @@ static bool TestLeakyReluGradGradMain(const framework::DDim &dim, LeakyReluGradGradFunctor functor; functor.alpha = alpha; auto &dev_ctx = *platform::DeviceContextPool::Instance().Get(place); - framework::Tensor *x = nullptr; + framework::Tensor *out = nullptr; framework::Tensor *dout = nullptr; framework::Tensor *dx = nullptr; - framework::Tensor out; - out.Resize(dim); - InitRandom(&out, place); + framework::Tensor x; + x.Resize(dim); + InitRandom(&x, place); framework::Tensor ddx; ddx.Resize(dim); @@ -85,22 +85,22 @@ static bool TestLeakyReluGradGradMain(const framework::DDim &dim, framework::Tensor ddout_actual; ddout_actual.mutable_data(dim, place); LeakyReluGradGradEachElementFunctor actual_functor( - ddx.data(), out.data(), static_cast(alpha), + ddx.data(), x.data(), static_cast(alpha), ddout_actual.data()); - int64_t limit = out.numel(); + int64_t limit = x.numel(); #ifdef __NVCC__ if (platform::is_gpu_place(place)) { auto &cuda_dev_ctx = dynamic_cast(dev_ctx); - functor(cuda_dev_ctx, x, &out, &ddx, &ddout, dout, dx); + functor(cuda_dev_ctx, &x, out, &ddx, &ddout, dout, dx); platform::ForRange for_range(cuda_dev_ctx, limit); for_range(actual_functor); } else { #endif auto &cpu_dev_ctx = dynamic_cast(dev_ctx); - functor(cpu_dev_ctx, x, &out, &ddx, &ddout, dout, dx); + functor(cpu_dev_ctx, &x, out, &ddx, &ddout, dout, dx); platform::ForRange for_range(cpu_dev_ctx, limit); for_range(actual_functor); diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..da4ca87296d92fc1052f462ae6ee8a3acb05eb49 --- /dev/null +++ b/paddle/fluid/operators/tile_op.cc @@ -0,0 +1,265 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/tile_op.h" +#include +#include +#include + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class TileOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Tile"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Tile"); + auto x_dims = ctx->GetInputDim("X"); + auto repeat_times = ctx->Attrs().Get>("repeat_times"); + if (repeat_times.size() == 0) { + repeat_times = std::vector(x_dims.size(), -1); + } + + PADDLE_ENFORCE_LE( + x_dims.size(), MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op " + "must not be greater than %d, but the value received is %d.", + MAX_RANK_SUPPORTED, x_dims.size())); + PADDLE_ENFORCE_LE( + repeat_times.size(), MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The size of the shape of input 'repeat_times' for tile op " + "must not be greater than %d, but the value received is %d.", + MAX_RANK_SUPPORTED, repeat_times.size())); + PADDLE_ENFORCE_GE( + repeat_times.size(), 1, + platform::errors::InvalidArgument( + "The size of the shape of input 'repeat_times' for tile op " + "must be positive integers, but the value received is %d.", + repeat_times.size())); + + auto out_rank = + std::max(static_cast(x_dims.size()), repeat_times.size()); + std::vector out_shape(out_rank); + auto x_dim_vec = framework::vectorize(x_dims); + if (x_dim_vec.size() > repeat_times.size()) { + auto diff = x_dim_vec.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, -1); + } else { + auto diff = repeat_times.size() - x_dim_vec.size(); + x_dim_vec.insert(x_dim_vec.begin(), diff, -1); + } + for (size_t i = 0; i < repeat_times.size(); ++i) { + if (x_dim_vec[i] == -1 || repeat_times[i] == -1) { + out_shape[i] = -1; + } else { + PADDLE_ENFORCE_GT( + repeat_times[i], 0, + platform::errors::InvalidArgument( + "Every element of the input 'repeat_times' for tile op must be " + "greater than 0, but the value given is %d.", + repeat_times[i])); + out_shape[i] = x_dim_vec[i] * repeat_times[i]; + } + } + + ctx->SetOutputDim("Out", framework::make_ddim(out_shape)); + if (out_shape[0] == x_dims[0]) { + ctx->ShareLoD("X", "Out"); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "repeat_times_tensor" || var_name == "RepeatTimes") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class TileOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor). X is the input to be titled."); + AddInput( + "RepeatTimes", + "(Tensor, optional). If provided, it is the number of repeat times" + " along specific axis. It has a higher priority than " + "repeat_times_tensor and the repeat_times attribute.") + .AsDispensable(); + AddInput("repeat_times_tensor", + "(Tensor Tensor), repeat times for X." + "It has a higher priority than repeat_times, but a lower priority " + "than RepeatTimes") + .AsDuplicable() + .AsDispensable(); + AddOutput("Out", + "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "After tiling, size of each dimension of Output(Out) is equal " + "to size of the corresponding dimension of Input(X) multiplying " + "the corresponding value given by Attr(repeat_times)."); + AddAttr>("repeat_times", + "The number of repeat times for each dimension.") + .SetDefault({}); + AddComment(R"DOC( +Tile operator repeats the input by given times number. You should set times +number for each dimension by providing attribute 'repeat_times'. The rank of X +should be in [1, 6]. Please note that size of 'repeat_times' must be the same +with X's rank. Following is a using case: + +Input(X) is a 3-D tensor with shape [2, 3, 1]: + + [ + [[1], [2], [3]], + [[4], [5], [6]] + ] + +Attr(repeat_times): [1, 2, 2] + +Output(Out) is a 3-D tensor with shape [2, 6, 2]: + + [ + [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]], + [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]] + ] + +)DOC"); + } +}; + +class TileGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "TileGrad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "TileGrad"); + + auto x_dims = ctx->GetInputDim("X"); + std::vector repeat_times = + ctx->Attrs().Get>("repeat_times"); + if (repeat_times.size() == 0) { + repeat_times = std::vector(x_dims.size(), -1); + } + + auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); + auto x_dim_vec = framework::vectorize(x_dims); + if (x_dim_vec.size() > repeat_times.size()) { + auto diff = x_dim_vec.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, -1); + } else { + auto diff = repeat_times.size() - x_dim_vec.size(); + x_dim_vec.insert(x_dim_vec.begin(), diff, -1); + } + + for (size_t i = 0; i < repeat_times.size(); ++i) { + if (repeat_times[i] == -1 || x_dim_vec[i] == -1) { + continue; + } else { + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ( + x_dim_vec[i] * repeat_times[i], out_dims[i], + platform::errors::InvalidArgument( + "The size (%d) of the dimension %d of Input(Out@GRAD) should " + "be equal to the multiplication of the crroresponding " + "dimension size of Input(X) (%d) and repeat_times (%d).", + out_dims[i], i, x_dim_vec[i], repeat_times[i])); + } + } + } + auto x_grad_name = framework::GradVarName("X"); + + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "repeat_times_tensor" || var_name == "RepeatTimes") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +template +class TileGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("tile_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetInput("repeat_times_tensor", this->Input("repeat_times_tensor")); + op->SetInput("RepeatTimes", this->Input("RepeatTimes")); + op->SetAttrMap(this->Attrs()); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERER(TileGradNoNeedBufVarsInferer, "X"); + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(tile, ops::TileOp, ops::TileOpMaker, + ops::TileGradOpMaker, + ops::TileGradOpMaker); +REGISTER_OPERATOR(tile_grad, ops::TileGradOp, + ops::TileGradNoNeedBufVarsInferer); +REGISTER_OP_CPU_KERNEL( + tile, ops::TileKernel, + ops::TileKernel, + ops::TileKernel, + ops::TileKernel, + ops::TileKernel); +REGISTER_OP_CPU_KERNEL( + tile_grad, ops::TileGradKernel, + ops::TileGradKernel, + ops::TileGradKernel, + ops::TileGradKernel); diff --git a/paddle/fluid/operators/tile_op.cu b/paddle/fluid/operators/tile_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..5ca82cd6a1f43551cb4d461bc47e962abd097a9a --- /dev/null +++ b/paddle/fluid/operators/tile_op.cu @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/operators/tile_op.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + tile, ops::TileKernel, + ops::TileKernel, + ops::TileKernel, + ops::TileKernel, + ops::TileKernel, + ops::TileKernel); +REGISTER_OP_CUDA_KERNEL( + tile_grad, ops::TileGradKernel, + ops::TileGradKernel, + ops::TileGradKernel, + ops::TileGradKernel, + ops::TileGradKernel); diff --git a/paddle/fluid/operators/tile_op.h b/paddle/fluid/operators/tile_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c6b0fdd720cf4be79dc403a53341b18366998a67 --- /dev/null +++ b/paddle/fluid/operators/tile_op.h @@ -0,0 +1,274 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +#define MAX_RANK_SUPPORTED 6 + +#define TILE_TEMPLATE(z, n, data) \ + case n + 1: { \ + Tile(context); \ + break; \ + } +#define REP_TILE_TEMPLATE(n) BOOST_PP_REPEAT(n, TILE_TEMPLATE, ~) +#define COND(n) BOOST_PP_GREATER_EQUAL(n, BOOST_PP_MOD(n, MAX_RANK_SUPPORTED)) +#define TILE_GRAD_CASE(n) \ + case n: { \ + TileBackward(context, reshape_dims_vec, reduce_dims_vec); \ + break; \ + } +#define TILE_GRAD_TEMPLATE(z, n, data) BOOST_PP_IF(COND(n), TILE_GRAD_CASE(n), ) +#define REP_TILE_GRAD_TEMPLATE(n) BOOST_PP_REPEAT(n, TILE_GRAD_TEMPLATE, ~) + +namespace paddle { +namespace operators { +inline std::vector get_repeat_times( + const framework::ExecutionContext& ctx) { + if (ctx.HasInput("RepeatTimes")) { + auto* repeat_tensor = ctx.Input("RepeatTimes"); + auto* repeat_data = repeat_tensor->data(); + framework::Tensor cpu_repeat_tensor; + if (platform::is_gpu_place(repeat_tensor->place())) { + TensorCopySync(*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor); + repeat_data = cpu_repeat_tensor.data(); + } + auto vec_repeat_times = + std::vector(repeat_data, repeat_data + repeat_tensor->numel()); + return vec_repeat_times; + } + + auto list_repeat_times_tensor = + ctx.MultiInput("repeat_times_tensor"); + if (list_repeat_times_tensor.size() > 0) { + // get tensor from + std::vector vec_repeat_times; + for (size_t i = 0; i < list_repeat_times_tensor.size(); ++i) { + auto tensor = list_repeat_times_tensor[i]; + if (platform::is_gpu_place(tensor->place())) { + framework::Tensor temp; + TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_repeat_times.push_back(*temp.data()); + } else { + vec_repeat_times.push_back(*tensor->data()); + } + } + return vec_repeat_times; + } else { + return ctx.Attr>("repeat_times"); + } +} + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenTensor = framework::EigenTensor; +using framework::To32BitIndex; + +template +class TileKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, rank)); + auto repeat_times = get_repeat_times(context); + int repeat_times_size = repeat_times.size(); + PADDLE_ENFORCE_GE( + repeat_times_size, 1, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile " + "op must be positive, but the value received is %d.", + repeat_times_size)); + PADDLE_ENFORCE_LE( + repeat_times_size, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number of elements of the input 'repeat_times' for tile op " + "must be less than or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, repeat_times_size)); + rank = std::max(rank, repeat_times_size); + switch (rank) { REP_TILE_TEMPLATE(MAX_RANK_SUPPORTED) } + } + + protected: + template + void Tile(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + + auto in_dims = in0->dims(); + auto repeat_times = get_repeat_times(context); + for (size_t i = 0; i < repeat_times.size(); ++i) { + PADDLE_ENFORCE_GT( + repeat_times[i], 0, + platform::errors::InvalidArgument( + "All elements of the input 'repeat_times' for tile op must " + "be positive integers, but the value received is %d.", + repeat_times[i])); + } + auto vec_in_dims = framework::vectorize(in_dims); + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + PADDLE_ENFORCE_EQ( + repeat_times.size(), vec_in_dims.size(), + platform::errors::InvalidArgument( + "The rank (%d) of the input 'x' and the rank (%d) of the input " + "'repeat_times' for tile op must match after promotion.", + vec_in_dims.size(), repeat_times.size())); + auto* out0 = context.Output("Out"); + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; + } + + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + + out0->Resize(out_dims); + auto x = EigenTensor::From(*in0, new_in_dims); + out0->mutable_data(context.GetPlace()); + auto y = EigenTensor::From(*out0, out_dims); + auto& place = + *context.template device_context().eigen_device(); + // use 32-bit index to speed up + bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); + if (use_32bit_index) { + To32BitIndex(y).device(place) = To32BitIndex(x).broadcast(bcast_dims); + } else { + y.device(place) = x.broadcast(bcast_dims); + } + } +}; + +template +class TileGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto repeat_times = get_repeat_times(context); + auto x_dims = in0->dims(); + auto vec_in_dims = framework::vectorize(x_dims); + if (repeat_times.size() < vec_in_dims.size()) { + int diff = vec_in_dims.size() - repeat_times.size(); + repeat_times.insert(repeat_times.begin(), diff, 1); + } else { + int diff = repeat_times.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + } + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); + + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), + out0); + } else { + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "Th rank of the input 'Out@GRAD' for tile_grad op " + " must be greater than or equal to 1, but " + "the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for tile_grad op " + "must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, dims)); + switch (dims) { REP_TILE_GRAD_TEMPLATE(MAX_RANK_SUPPORTED) } + } + } + + protected: + template + void TileBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + x_grad.device( + *context.template device_context().eigen_device()) = + out_grad.reshape(reshape_dims) + .sum(reduce_dims) + .reshape(x_grad.dimensions()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cc b/paddle/fluid/operators/truncated_gaussian_random_op.cc index 9e158abba747d124c83e0366b9c0c5845c49e183..3aa9ff544af63993521d41604cecef0b283ebc1e 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -161,18 +162,27 @@ class CPUTruncatedGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - unsigned int seed = static_cast(context.Attr("seed")); - std::minstd_rand engine; - if (seed == 0) { - seed = std::random_device()(); - } - engine.seed(seed); std::uniform_real_distribution dist(std::numeric_limits::min(), 1.0); TruncatedNormal truncated_normal(mean, std); int64_t size = tensor->numel(); - for (int64_t i = 0; i < size; ++i) { - data[i] = truncated_normal(dist(engine)); + + if (framework::Generator::GetInstance()->is_init_py) { + std::mt19937_64& gen_engine = + framework::Generator::GetInstance()->GetCPUEngine(); + for (int64_t i = 0; i < size; ++i) { + data[i] = truncated_normal(dist(gen_engine)); + } + } else { + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + for (int64_t i = 0; i < size; ++i) { + data[i] = truncated_normal(dist(engine)); + } } } }; diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index e0c56307639afeb70e5cc45a4022996cef52a475..a4487cde277990a725fd4c37b6d807278e314343 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/uniform_random_op.h" #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" + namespace paddle { namespace operators { @@ -55,19 +57,40 @@ class CPUUniformRandomKernel : public framework::OpKernel { "supports SelectedRows and LoDTensor"); } T *data = tensor->mutable_data(ctx.GetPlace()); - unsigned int seed = static_cast(ctx.Attr("seed")); - std::minstd_rand engine; - if (seed == 0) { - seed = std::random_device()(); - } - engine.seed(seed); + + int64_t size = tensor->numel(); std::uniform_real_distribution dist( static_cast(ctx.Attr("min")), static_cast(ctx.Attr("max"))); - int64_t size = tensor->numel(); - for (int64_t i = 0; i < size; ++i) { - data[i] = dist(engine); + auto gen_ptr = framework::Generator::GetInstance(); + if (gen_ptr->is_init_py) { + std::mt19937_64 &gen_engine = gen_ptr->GetCPUEngine(); + // auto gen_engine = gen_ptr_->GetCPUEngine(); + // std::uniform_real_distribution dist( + // static_cast(ctx.Attr("min")), + // static_cast(ctx.Attr("max"))); + + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(gen_engine); + } + } else { + unsigned int seed = static_cast(ctx.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + // std::uniform_real_distribution dist( + // static_cast(ctx.Attr("min")), + // static_cast(ctx.Attr("max"))); + // int64_t size = tensor->numel(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } } + // std::mt19937_64 &engine = gen_ptr->GetCPUEngine(); + // auto engine = gen_ptr_->GetCPUEngine(); + unsigned int diag_num = static_cast(ctx.Attr("diag_num")); unsigned int diag_step = diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu index 53c79cf672e7d71ea2e7202f624a0110cc6ce41d..c024bb87b09c00c34dbaaf7b747f29743152502f 100644 --- a/paddle/fluid/operators/uniform_random_op.cu +++ b/paddle/fluid/operators/uniform_random_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/uniform_random_op.h" @@ -87,9 +88,14 @@ class GPUUniformRandomKernel : public framework::OpKernel { } T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); - if (seed == 0) { - std::random_device rd; - seed = rd(); + if (framework::Generator::GetInstance()->is_init_py) { + seed = static_cast( + framework::Generator::GetInstance()->GetCurrentSeed()); + } else { + if (seed == 0) { + std::random_device rd; + seed = rd(); + } } T min = static_cast(context.Attr("min")); T max = static_cast(context.Attr("max")); diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 867b10441640c63fec9018363a59d29ac52c8743..d263dd03dd0de0d1b12925d0c3ec428b6730ef2e 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -17,6 +17,7 @@ #include #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/xpu/mul_xpu_op.cc b/paddle/fluid/operators/xpu/mul_xpu_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..79aae71c3045f938f4b8f0d3e05ce7cf358c41ea --- /dev/null +++ b/paddle/fluid/operators/xpu/mul_xpu_op.cc @@ -0,0 +1,183 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include +#include +#include +#include +#include "paddle/fluid/operators/mul_op.h" + +namespace paddle { +namespace operators { + +using framework::OpKernelType; +using framework::Tensor; + +template +class MulXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* x = context.Input("X"); + const Tensor* y = context.Input("Y"); + Tensor* z = context.Output("Out"); + const Tensor x_matrix = + x->dims().size() > 2 + ? framework::ReshapeToMatrix( + *x, context.template Attr("x_num_col_dims")) + : *x; + const Tensor y_matrix = + y->dims().size() > 2 + ? framework::ReshapeToMatrix( + *y, context.template Attr("y_num_col_dims")) + : *y; + z->mutable_data(context.GetPlace()); + auto z_dim = z->dims(); + if (z_dim.size() != 2) { + z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); + } + bool trans_a = false; + bool trans_b = false; + int m = x_matrix.dims()[0]; + int k = x_matrix.dims()[1]; + int k1 = y_matrix.dims()[0]; + int n = y_matrix.dims()[1]; + PADDLE_ENFORCE_EQ( + k, k1, platform::errors::InvalidArgument("Shape mistake in mul_op")); + T alpha = static_cast(1.0); + T beta = static_cast(0.0); + const T* data_a = x_matrix.data(); + const T* data_b = y_matrix.data(); + T* data_c = z->data(); + auto& dev_ctx = context.template device_context(); + int ret = xpu::fc_int16(dev_ctx.x_context(), trans_a, trans_b, m, n, k, + alpha, data_a, data_b, beta, data_c); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + if (z_dim.size() != 2) { + z->Resize(z_dim); + } + } +}; + +template +class MulGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + int x_num_col_dims = ctx.template Attr("x_num_col_dims"); + int y_num_col_dims = ctx.template Attr("y_num_col_dims"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto x_matrix = x->dims().size() > 2 + ? framework::ReshapeToMatrix(*x, x_num_col_dims) + : static_cast(*x); + auto y_matrix = y->dims().size() > 2 + ? framework::ReshapeToMatrix(*y, y_num_col_dims) + : static_cast(*y); + auto* dout = ctx.Input(framework::GradVarName("Out")); + Tensor dout_mat; + dout_mat.Resize({framework::flatten_to_2d(x->dims(), x_num_col_dims)[0], + framework::flatten_to_2d(y->dims(), y_num_col_dims)[1]}); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + if (dx != nullptr) { + dx->set_lod(x->lod()); + } + if (dy != nullptr) { + dy->set_lod(y->lod()); + } + auto& dev_ctx = ctx.template device_context(); + if (dx) { + dx->mutable_data(ctx.GetPlace()); + Tensor dx_matrix = dx->dims().size() > 2 + ? framework::ReshapeToMatrix(*dx, x_num_col_dims) + : *dx; + // dx = dout * y'. dx: M x K, dout : M x N, y : K x N + // blas.MatMul(dout_mat, false, y_matrix, true, &dx_matrix); + bool trans_a = false; + bool trans_b = true; + int m = dout_mat.dims()[0]; + int k = dout_mat.dims()[1]; + int n = y_matrix.dims()[0]; + int k1 = y_matrix.dims()[1]; + PADDLE_ENFORCE_EQ( + k, k1, platform::errors::InvalidArgument("Shape mistake in mul_op")); + int lda = (!trans_a) ? k : m; + int ldb = (!trans_b) ? n : k; + int ldc = n; + T alpha = static_cast(1.0); + T beta = static_cast(0.0); + const T* data_a = dout->data(); + const T* data_b = y_matrix.data(); + T* data_c = dx_matrix.data(); + int ret = + xpu::gemm_int16(dev_ctx.x_context(), trans_a, trans_b, m, n, k, alpha, + data_a, lda, data_b, ldb, beta, data_c, ldc); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + ret)); + } + + if (dy) { + dy->mutable_data(ctx.GetPlace()); + Tensor dy_matrix = dy->dims().size() > 2 + ? framework::ReshapeToMatrix(*dy, y_num_col_dims) + : *dy; + // dy = x' * dout. dy K x N, dout : M x N, x : M x K + // blas.MatMul(x_matrix, true, dout_mat, false, &dy_matrix); + bool trans_a = true; + bool trans_b = false; + int k = x_matrix.dims()[0]; + int m = x_matrix.dims()[1]; + int k1 = dout_mat.dims()[0]; + int n = dout_mat.dims()[1]; + PADDLE_ENFORCE_EQ( + k, k1, platform::errors::InvalidArgument("Shape mistake in mul_op")); + int lda = (!trans_a) ? k : m; + int ldb = (!trans_b) ? n : k; + int ldc = n; + T alpha = static_cast(1.0); + T beta = static_cast(0.0); + const T* data_a = x_matrix.data(); + const T* data_b = dout->data(); + T* data_c = dy_matrix.data(); + int ret = + xpu::gemm_int16(dev_ctx.x_context(), trans_a, trans_b, m, n, k, alpha, + data_a, lda, data_b, ldb, beta, data_c, ldc); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + ret)); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + mul, ops::MulXPUKernel); +REGISTER_OP_XPU_KERNEL( + mul_grad, ops::MulGradXPUKernel) +#endif diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 5a100c5746e616e860811dd47da27036ea7355d5..15530738593419cd821bed1f12ec63dfed43cd7c 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -4,6 +4,12 @@ if(WITH_GPU) proto_library(cuda_error_proto SRCS cuda_error.proto) endif(WITH_GPU) +if(WITH_XPU) + set(XPU_CTX_DEPS xpulib) +ELSE() + set(XPU_CTX_DEPS) +endif(WITH_XPU) + if (WITH_PYTHON) py_proto_compile(profiler_py_proto SRCS profiler.proto) add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) @@ -45,11 +51,15 @@ ENDIF() cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS}) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) -nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce monitor) +nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce monitor dynload_cuda) cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) +if(WITH_XPU) +cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce) +endif() + add_subdirectory(dynload) add_subdirectory(stream) @@ -84,7 +94,7 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost) # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS} place eigen3 stringpiece cpu_helper cpu_info framework_proto ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} - ${dgc_deps} dlpack cudnn_workspace_helper) + ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS}) cc_library(collective_helper SRCS collective_helper.cc DEPS framework_proto device_context enforce) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 38b0894c3f71dc150a9ed737b0ac17b22baffb8a..29982c13c8ca88bc8b4a168f92e4116a283a97e8 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -61,7 +61,8 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { if (it == device_contexts_.end()) { PADDLE_THROW(platform::errors::Unimplemented( "Place %s is not supported. Please check that your paddle compiles " - "with WITH_GPU option or check that your train process hold the " + "with WITH_GPU or WITH_XPU option or check that your train process " + "hold the " "correct gpu_id if you use Executor.", place)); } @@ -115,6 +116,14 @@ DeviceContextPool::DeviceContextPool( PADDLE_THROW(platform::errors::Unimplemented( "CUDAPlace is not supported. Please re-compile with WITH_GPU " "option.")); +#endif + } else if (platform::is_xpu_place(p)) { +#ifdef PADDLE_WITH_XPU + EmplaceDeviceContext(&device_contexts_, p); +#else + PADDLE_THROW( + platform::errors::Unimplemented("XPUPlace is not supported. Please " + "re-compile with WITH_XPU option.")); #endif } } @@ -134,6 +143,49 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { Place CPUDeviceContext::GetPlace() const { return place_; } +#ifdef PADDLE_WITH_XPU +XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } + +XPUDeviceContext::~XPUDeviceContext() { xpu::destroy_context(context_); } + +XPUDeviceContext::XPUDeviceContext(XPUPlace place) : place_(place) { + int dev_id = -1; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + ret = xpu_set_device(place.device); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + context_ = xpu::create_context(); + ret = xpu_set_device(dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); +} + +void XPUDeviceContext::Wait() const { + int ret = xpu_set_device(place_.device); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + xpu_wait(); +} + +Place XPUDeviceContext::GetPlace() const { return place_; } + +xpu::Context* XPUDeviceContext::x_context() const { return context_; } +#endif + #ifdef PADDLE_WITH_CUDA class EigenCudaStreamDevice : public Eigen::StreamInterface { @@ -412,9 +464,21 @@ MKLDNNDeviceContextThreadLocals::Body::get_cur_paddle_data_layout(void) { return cur_paddle_data_layout; } -void MKLDNNDeviceContext::ResetBlobMap() const { - VLOG(3) << "Clearing DNNL cache."; - p_blobmap_->clear(); +void MKLDNNDeviceContext::ResetBlobMap() { + std::lock_guard lock(*p_mutex_); + if (!block_next_cache_clearing_) { + VLOG(3) << "Clearing DNNL cache."; + p_blobmap_->clear(); + } else { + VLOG(3) << "Prevented Clearing DNNL cache."; + block_next_cache_clearing_ = false; + } +} + +void MKLDNNDeviceContext::BlockNextCacheClearing() { + std::lock_guard lock(*p_mutex_); + VLOG(3) << "Next DNNL cache clearing has been blocked."; + block_next_cache_clearing_ = true; } size_t MKLDNNDeviceContext::GetShapeBlobSize() const { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 7511edb9ccf2c6ca1d5aea2964799b8be08064b6..8bfdfc8a1c6033a79c197e1cd425197f77079bda 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -43,6 +43,10 @@ limitations under the License. */ #endif #include "unsupported/Eigen/CXX11/Tensor" +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_header.h" +#endif + namespace paddle { namespace platform { @@ -76,6 +80,35 @@ struct DefaultDeviceContextType { using TYPE = CPUDeviceContext; }; +#ifdef PADDLE_WITH_XPU +class XPUDeviceContext : public DeviceContext { + public: + XPUDeviceContext(); + explicit XPUDeviceContext(XPUPlace place); + virtual ~XPUDeviceContext(); + Eigen::DefaultDevice* eigen_device() const { return nullptr; } + Place GetPlace() const override; + xpu::Context* x_context() const; + + /*! \brief Wait for all operations completion in the stream. */ + void Wait() const override; + + private: + XPUPlace place_; + xpu::Context* context_; + + // Need to be the same with other DeviceContext, + // Eventhough eigen_device_ is not used in XPU + std::unique_ptr eigen_device_; + DISABLE_COPY_AND_ASSIGN(XPUDeviceContext); +}; + +template <> +struct DefaultDeviceContextType { + using TYPE = XPUDeviceContext; +}; +#endif + #ifdef PADDLE_WITH_CUDA class EigenCudaStreamDevice; @@ -487,7 +520,10 @@ class MKLDNNDeviceContext : public CPUDeviceContext { const mkldnn::engine& GetEngine() const { return engine_; } // Remove all entries from the blob map - void ResetBlobMap() const; + void ResetBlobMap(); + + // Prevent next ResetBlobMap() + void BlockNextCacheClearing(); // Get the ShapeBlob size in cur_mkldnn_session_id. size_t GetShapeBlobSize() const; @@ -506,6 +542,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { mkldnn::engine engine_; std::shared_ptr p_blobmap_; std::shared_ptr p_mutex_; + bool block_next_cache_clearing_ = false; }; #endif diff --git a/paddle/fluid/platform/device_context_xpu_test.cc b/paddle/fluid/platform/device_context_xpu_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3de2e3957a990a254ffb762f996876a122a865bc --- /dev/null +++ b/paddle/fluid/platform/device_context_xpu_test.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/platform/device_context.h" + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +TEST(Device, Init) { + using paddle::platform::DeviceContext; + using paddle::platform::XPUDeviceContext; + using paddle::platform::XPUPlace; + + int count = paddle::platform::GetXPUDeviceCount(); + for (int i = 0; i < count; i++) { + XPUDeviceContext* device_context = new XPUDeviceContext(XPUPlace(i)); + xpu::Context* ctx = device_context->x_context(); + ASSERT_NE(nullptr, ctx); + delete device_context; + } +} + +TEST(Device, DeviceContextPool) { + using paddle::platform::DeviceContextPool; + using paddle::platform::XPUDeviceContext; + using paddle::platform::Place; + using paddle::platform::CPUPlace; + using paddle::platform::XPUPlace; + + DeviceContextPool& pool = DeviceContextPool::Instance(); + auto cpu_dev_ctx1 = pool.Get(CPUPlace()); + auto cpu_dev_ctx2 = pool.Get(CPUPlace()); + ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1); + + std::vector xpu_places; + int count = paddle::platform::GetXPUDeviceCount(); + for (int i = 0; i < count; ++i) { + auto dev_ctx = pool.Get(XPUPlace(i)); + ASSERT_NE(dev_ctx, nullptr); + } +} diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 0eb28f0c0c3561f98891ff2a0ab5a26a20b07fb4..ebeb14e940e5fd904e506bca565c4aeae84c93cf 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -100,6 +100,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(cudnnCreateDropoutDescriptor); \ __macro(cudnnDropoutGetStatesSize); \ __macro(cudnnSetDropoutDescriptor); \ + __macro(cudnnRestoreDropoutDescriptor); \ __macro(cudnnCreateRNNDescriptor); \ __macro(cudnnGetRNNParamsSize); \ __macro(cudnnGetRNNWorkspaceSize); \ diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 5b612677da3554f17ab3ac29ddc241eee5f7c768..ce1ec507307a2721e641ac15425c6a2321e514c7 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -266,7 +266,7 @@ inline std::string GetErrorSumaryString(StrType&& what, const char* file, std::ostringstream sout; sout << "\n----------------------\nError Message " "Summary:\n----------------------\n"; - sout << string::Sprintf("%s at (%s:%d)", std::forward(what), file, + sout << string::Sprintf("%s (at %s:%d)", std::forward(what), file, line) << std::endl; return sout.str(); diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 5f63233d8bee4beefd6e1695d8bc3d6e5e4ae7fb..ca1e5501c6a84e6136c28f564a78a7e63f0ee8d4 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "gflags/gflags.h" #include "paddle/fluid/platform/cuda_device_guard.h" +#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/macros.h" @@ -38,11 +39,11 @@ USE_GPU_MEM_STAT; namespace paddle { namespace platform { -/* Here is a very simple CUDA “pro tip”: cudaDeviceGetAttribute() is a much -faster way to query device properties. You can see details in -https://devblogs.nvidia.com/cuda-pro-tip-the-fast-way-to-query-device-properties/ -*/ +int CudnnVersion() { + if (!dynload::HasCUDNN()) return -1; + return dynload::cudnnGetVersion(); +} static int GetCUDADeviceCountImpl() { int driverVersion = 0; cudaError_t status = cudaDriverGetVersion(&driverVersion); @@ -73,6 +74,10 @@ int GetCUDADeviceCount() { return dev_cnt; } +/* Here is a very simple CUDA “pro tip”: cudaDeviceGetAttribute() is a much +faster way to query device properties. You can see details in +https://devblogs.nvidia.com/cuda-pro-tip-the-fast-way-to-query-device-properties/ +*/ int GetCUDAComputeCapability(int id) { PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index 6a9893647172e2c63f4749fdb0ae1cb0fdfaaf04..ec77447ef77dbb1cd7ee180176f95a9ab8f7c03a 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -23,6 +23,8 @@ limitations under the License. */ namespace paddle { namespace platform { +//! Get the version of cudnn +int CudnnVersion(); //! Get the total number of GPU devices in system. int GetCUDADeviceCount(); diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 261f6e807a22d328a20156bed8ee9974637898c3..2e708e44fd0e49e1c33e048084d15e13c6e4d57e 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -33,6 +33,11 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/piece.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu_info.h" +#endif + DECLARE_int32(paddle_num_threads); DEFINE_int32(multiple_of_cupti_buffer_size, 1, "Multiple of the CUPTI device buffer size. If the timestamps have " @@ -151,6 +156,14 @@ void InitDevices(bool init_p2p) { } catch (const std::exception &exp) { LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime."; } +#endif +#ifdef PADDLE_WITH_XPU + try { + // use user specified XPUs in single-node multi-process mode. + devices = platform::GetXPUSelectedDevices(); + } catch (const std::exception &exp) { + LOG(WARNING) << "Compiled with WITH_XPU, but no XPU found in runtime."; + } #endif InitDevices(init_p2p, devices); } @@ -165,7 +178,13 @@ void InitDevices(bool init_p2p, const std::vector devices) { LOG(WARNING) << "Invalid devices id."; continue; } + +#ifdef PADDLE_WITH_CUDA places.emplace_back(platform::CUDAPlace(devices[i])); +#endif +#ifdef PADDLE_WITH_XPU + places.emplace_back(platform::XPUPlace(devices[i])); +#endif } if (init_p2p) { InitP2P(devices); diff --git a/paddle/fluid/platform/init_test.cc b/paddle/fluid/platform/init_test.cc index 6392c4f4c42af9030e9dd0b3373df60938a4676f..f14fbdd74f95bfbed53ff787af861ce4656159c0 100644 --- a/paddle/fluid/platform/init_test.cc +++ b/paddle/fluid/platform/init_test.cc @@ -20,7 +20,7 @@ TEST(InitDevices, CPU) { using paddle::framework::InitDevices; using paddle::platform::DeviceContextPool; -#ifndef PADDLE_WITH_CUDA +#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_XPU) InitDevices(true); DeviceContextPool& pool = DeviceContextPool::Instance(); ASSERT_EQ(pool.size(), 1U); @@ -39,6 +39,18 @@ TEST(InitDevices, CUDA) { #endif } +TEST(InitDevices, XPU) { + using paddle::framework::InitDevices; + using paddle::platform::DeviceContextPool; + +#ifdef PADDLE_WITH_XPU + int count = paddle::platform::GetXPUDeviceCount(); + InitDevices(true); + DeviceContextPool& pool = DeviceContextPool::Instance(); + ASSERT_EQ(pool.size(), 1U + static_cast(count)); +#endif +} + #ifndef _WIN32 TEST(SignalHandle, SignalHandle) { std::string msg = "Signal raises"; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index c74c47b7d84820f089d4e657f8bddccc5de8d727..3782eb684f21f8c09e9dac124082ae596fe5d1bc 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -129,6 +129,16 @@ inline void ClearMKLDNNCache(const platform::Place& place) { } } +inline void DontClearMKLDNNCache(const platform::Place& place) { + // Clear mkl-dnn cache, + if (platform::is_cpu_place(place)) { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + platform::MKLDNNDeviceContext* dev_ctx = + (platform::MKLDNNDeviceContext*)pool.Get(place); + dev_ctx->BlockNextCacheClearing(); + } +} + template mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::data_type::undef; diff --git a/paddle/fluid/platform/place.cc b/paddle/fluid/platform/place.cc index 195acc1b6d15a91369d48164179cd6e0b5cfac8d..b80d2fd1632cd82c231fae724fc4d754b8fed0fc 100644 --- a/paddle/fluid/platform/place.cc +++ b/paddle/fluid/platform/place.cc @@ -32,6 +32,7 @@ class PlacePrinter : public boost::static_visitor<> { void operator()(const CUDAPlace &p) { os_ << "CUDAPlace(" << p.device << ")"; } + void operator()(const XPUPlace &p) { os_ << "XPUPlace(" << p.device << ")"; } void operator()(const CUDAPinnedPlace &p) { os_ << "CUDAPinnedPlace"; } private: @@ -44,6 +45,10 @@ bool is_gpu_place(const Place &p) { return boost::apply_visitor(IsCUDAPlace(), p); } +bool is_xpu_place(const Place &p) { + return boost::apply_visitor(IsXPUPlace(), p); +} + bool is_cpu_place(const Place &p) { return boost::apply_visitor(IsCPUPlace(), p); } @@ -60,6 +65,8 @@ bool is_same_place(const Place &p1, const Place &p2) { if (places_are_same_class(p1, p2)) { if (is_cpu_place(p1) || is_cuda_pinned_place(p1)) { return true; + } else if (is_xpu_place(p1)) { + return BOOST_GET_CONST(XPUPlace, p1) == BOOST_GET_CONST(XPUPlace, p2); } else { return BOOST_GET_CONST(CUDAPlace, p1) == BOOST_GET_CONST(CUDAPlace, p2); } diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h index eeda10a633b655dee0da9197888738cd94b50809..f95f6954a32e771e7413a766afcfea8b85ff1f7e 100644 --- a/paddle/fluid/platform/place.h +++ b/paddle/fluid/platform/place.h @@ -58,31 +58,58 @@ struct CUDAPinnedPlace { inline bool operator<(const CUDAPinnedPlace &) const { return false; } }; +// Place for Baidu Kunlun Accelerator +struct XPUPlace { + XPUPlace() : XPUPlace(0) {} + explicit XPUPlace(int d) : device(d) {} + + inline int GetDeviceId() const { return device; } + // needed for variant equality comparison + inline bool operator==(const XPUPlace &o) const { return device == o.device; } + inline bool operator!=(const XPUPlace &o) const { return !(*this == o); } + inline bool operator<(const XPUPlace &o) const { return device < o.device; } + + int device; +}; + struct IsCUDAPlace : public boost::static_visitor { bool operator()(const CPUPlace &) const { return false; } + bool operator()(const XPUPlace &) const { return false; } bool operator()(const CUDAPlace &gpu) const { return true; } bool operator()(const CUDAPinnedPlace &) const { return false; } }; struct IsCPUPlace : public boost::static_visitor { bool operator()(const CPUPlace &cpu) const { return true; } + bool operator()(const XPUPlace &) const { return false; } bool operator()(const CUDAPlace &) const { return false; } bool operator()(const CUDAPinnedPlace &) const { return false; } }; struct IsCUDAPinnedPlace : public boost::static_visitor { bool operator()(const CPUPlace &) const { return false; } + bool operator()(const XPUPlace &) const { return false; } bool operator()(const CUDAPlace &) const { return false; } bool operator()(const CUDAPinnedPlace &cuda_pinned) const { return true; } }; -class Place : public boost::variant { +struct IsXPUPlace : public boost::static_visitor { + bool operator()(const CPUPlace &) const { return false; } + bool operator()(const XPUPlace &xpu) const { return true; } + bool operator()(const CUDAPlace &) const { return false; } + bool operator()(const CUDAPinnedPlace &) const { return false; } +}; + +class Place + : public boost::variant { private: - using PlaceBase = boost::variant; + using PlaceBase = + boost::variant; public: Place() = default; Place(const CPUPlace &cpu_place) : PlaceBase(cpu_place) {} // NOLINT + Place(const XPUPlace &xpu_place) : PlaceBase(xpu_place) {} // NOLINT Place(const CUDAPlace &cuda_place) : PlaceBase(cuda_place) {} // NOLINT Place(const CUDAPinnedPlace &cuda_pinned_place) // NOLINT : PlaceBase(cuda_pinned_place) {} @@ -98,6 +125,7 @@ class Place : public boost::variant { using PlaceList = std::vector; bool is_gpu_place(const Place &); +bool is_xpu_place(const Place &); bool is_cpu_place(const Place &); bool is_cuda_pinned_place(const Place &); bool places_are_same_class(const Place &, const Place &); @@ -115,6 +143,16 @@ struct PlaceVisitorWrapper return visitor_(cpu); } + typename Visitor::result_type operator()(const XPUPlace &xpu) const { +#ifdef PADDLE_WITH_XPU + return visitor_(xpu); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with XPU. Cannot visit xpu device")); + return typename Visitor::result_type(); +#endif + } + typename Visitor::result_type operator()(const CUDAPlace &cuda) const { #ifdef PADDLE_WITH_CUDA return visitor_(cuda); diff --git a/paddle/fluid/platform/place_test.cc b/paddle/fluid/platform/place_test.cc index e4c1d3def90f191194b46bb9ea27dd27d69dcb8b..13f28c73f4504aea85d6155a3daa8f8f01b26385 100644 --- a/paddle/fluid/platform/place_test.cc +++ b/paddle/fluid/platform/place_test.cc @@ -18,19 +18,32 @@ TEST(Place, Equality) { paddle::platform::CPUPlace cpu; paddle::platform::CUDAPlace g0(0), g1(1), gg0(0); + paddle::platform::XPUPlace x0(0), x1(1), xx0(0); EXPECT_EQ(cpu, cpu); EXPECT_EQ(g0, g0); EXPECT_EQ(g1, g1); EXPECT_EQ(g0, gg0); + EXPECT_EQ(x0, x0); + EXPECT_EQ(x1, x1); + EXPECT_EQ(x0, xx0); EXPECT_NE(g0, g1); + EXPECT_NE(x0, x1); EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0)); + EXPECT_TRUE(paddle::platform::places_are_same_class(x0, xx0)); EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu)); + EXPECT_FALSE(paddle::platform::places_are_same_class(x0, cpu)); + EXPECT_FALSE(paddle::platform::places_are_same_class(g0, x0)); } TEST(Place, Print) { + { + std::stringstream ss; + ss << paddle::platform::XPUPlace(1); + EXPECT_EQ("XPUPlace(1)", ss.str()); + } { std::stringstream ss; ss << paddle::platform::CUDAPlace(1); diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu_header.h new file mode 100644 index 0000000000000000000000000000000000000000..d8c5f85f9cfe4b9d6ac07069fff89d37c695af5b --- /dev/null +++ b/paddle/fluid/platform/xpu_header.h @@ -0,0 +1,23 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef PADDLE_WITH_XPU +#include "xpu/api.h" +#include "xpu/runtime.h" +#include "xpu/runtime_ex.h" + +namespace xpu = baidu::xpu::api; +#endif diff --git a/paddle/fluid/platform/xpu_info.cc b/paddle/fluid/platform/xpu_info.cc new file mode 100644 index 0000000000000000000000000000000000000000..f88248fda7e65e1b96448c0576880a18a9d8a4a9 --- /dev/null +++ b/paddle/fluid/platform/xpu_info.cc @@ -0,0 +1,107 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/platform/xpu_info.h" + +#include +#include +#include +#include "gflags/gflags.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/string/split.h" + +DEFINE_string(selected_xpus, "", + "A list of device ids separated by comma, like: 0,1,2,3. " + "This option is useful when doing multi process training and " + "each process have only one device (XPU). If you want to use " + "all visible devices, set this to empty string. NOTE: the " + "reason of doing this is that we want to use P2P communication" + "between XPU devices, use XPU_VISIBLE_DEVICES can only use" + "share-memory only."); + +namespace paddle { +namespace platform { + +static int GetXPUDeviceCountImpl() { + const auto *xpu_visible_devices = std::getenv("XPU_VISIBLE_DEVICES"); + if (xpu_visible_devices != nullptr) { + std::string xpu_visible_devices_str(xpu_visible_devices); + if (std::all_of(xpu_visible_devices_str.begin(), + xpu_visible_devices_str.end(), + [](char ch) { return ch == ' '; })) { + VLOG(2) << "XPU_VISIBLE_DEVICES is set to be empty. No XPU detected."; + return 0; + } + } + + int count = 0; + int ret = xpu_device_count(&count); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + return count; +} + +int GetXPUDeviceCount() { + static auto dev_cnt = GetXPUDeviceCountImpl(); + return dev_cnt; +} + +int GetXPUCurrentDeviceId() { + int dev_id; + int ret = xpu_current_device(&dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); + + if (dev_id >= 64) { + // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id + dev_id -= 64; + } + return dev_id; +} + +//! Get a list of device ids from environment variable or use all. +std::vector GetXPUSelectedDevices() { + // use user specified XPUs in single-node multi-process mode. + std::vector devices; + if (!FLAGS_selected_xpus.empty()) { + auto devices_str = paddle::string::Split(FLAGS_selected_xpus, ','); + for (auto id : devices_str) { + devices.push_back(atoi(id.c_str())); + } + } else { + int count = GetXPUDeviceCount(); + for (int i = 0; i < count; ++i) { + devices.push_back(i); + } + } + return devices; +} + +void SetXPUDeviceId(int id) { + PADDLE_ENFORCE_LT( + id, GetXPUDeviceCount(), + platform::errors::InvalidArgument("id must less than XPU count")); + int ret = xpu_set_device(id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + ret)); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/xpu_info.h b/paddle/fluid/platform/xpu_info.h new file mode 100644 index 0000000000000000000000000000000000000000..efaba13453e7472ed09ff66c70bdaf19eb89549d --- /dev/null +++ b/paddle/fluid/platform/xpu_info.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include + +namespace paddle { +namespace platform { + +//! Get the total number of XPU devices in system. +int GetXPUDeviceCount(); + +//! Get the current XPU device id in system. +int GetXPUCurrentDeviceId(); + +//! Get a list of device ids from environment variable or use all. +std::vector GetXPUSelectedDevices(); + +//! Set the XPU device id for next execution. +void SetXPUDeviceId(int device_id); + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index b5165078cb17fe404d7a12230f02283b41391a3f..4c0552bf1e26cc0b4b06f6476255af93c4836093 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context - gloo_wrapper infer_io_utils heter_wrapper) + gloo_wrapper infer_io_utils heter_wrapper generator) if (WITH_NCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) @@ -37,7 +37,8 @@ set(PYBIND_SRCS data_set_py.cc imperative.cc ir.cc - inference_api.cc) + inference_api.cc + generator_py.cc) if (WITH_CRYPTO) set(PYBIND_DEPS ${PYBIND_DEPS} paddle_crypto) @@ -71,13 +72,23 @@ if(WITH_PYTHON) set(tmp_impl_file ${impl_file}.tmp) if(WIN32) - add_custom_command(TARGET op_function_generator - POST_BUILD - COMMAND "${CMAKE_BINARY_DIR}/paddle/fluid/pybind/${CMAKE_BUILD_TYPE}/op_function_generator" - "${tmp_impl_file}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} ${impl_file} - COMMENT "copy_if_different ${impl_file}" - VERBATIM + file(WRITE ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat "" + "set build_times=1\n" + ":retry\n" + "ECHO op_function_generator run %build_times% time\n" + "${CMAKE_BINARY_DIR}/paddle/fluid/pybind/${CMAKE_BUILD_TYPE}/op_function_generator ${impl_file}\n" + "if %ERRORLEVEL% NEQ 0 (\n" + " set /a build_times=%build_times%+1\n" + " if %build_times% GTR 100 (\n" + " exit /b 1\n" + " ) else (\n" + " goto :retry\n" + " )\n" + ")\n" + "exit /b 0") + + add_custom_command(TARGET op_function_generator POST_BUILD + COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat ) if(${CBLAS_PROVIDER} STREQUAL MKLML) diff --git a/paddle/fluid/pybind/generator_py.cc b/paddle/fluid/pybind/generator_py.cc new file mode 100644 index 0000000000000000000000000000000000000000..3bccd5fb2dd92298323381c09467937abd87a53c --- /dev/null +++ b/paddle/fluid/pybind/generator_py.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include + +#ifdef _POSIX_C_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#ifdef _XOPEN_SOURCE +#undef _XOPEN_SOURCE +#endif + +#include +#include +#include + +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/pybind/generator_py.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { +void BindGenerator(py::module* m) { + py::class_(*m, "GeneratorState", ""); + py::class_(*m, "mt19937_64", ""); + py::class_>( + *m, "Generator") + .def(py::init([]() { return framework::Generator::GetInstanceX(); }), + py::return_value_policy::reference) + .def("get_state", &framework::Generator::GetState, + py::return_value_policy::move) + .def("set_state", &framework::Generator::SetState) + .def("manual_seed", &framework::Generator::SetCurrentSeed) + .def("seed", &framework::Generator::Seed) + .def("initial_seed", &framework::Generator::GetCurrentSeed) + .def("random", &framework::Generator::Random64) + .def("get_cpu_engine", &framework::Generator::GetCPUEngine, + py::return_value_policy::move) + .def("set_cpu_engine", &framework::Generator::SetCPUEngine); +} // end Generator +} // end namespace pybind +} // end namespace paddle diff --git a/paddle/fluid/pybind/generator_py.h b/paddle/fluid/pybind/generator_py.h new file mode 100644 index 0000000000000000000000000000000000000000..d37654c1ba24e296fb325d1507187c5a954754bd --- /dev/null +++ b/paddle/fluid/pybind/generator_py.h @@ -0,0 +1,28 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { + +void BindGenerator(py::module* m); + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index ac1d2bc1f31d62a2ca9ccb9378bc17ac37d09ec9..021d10ca7facb0bac11cd5d08eddea7e01b9b566 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -66,11 +66,13 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) { return place_obj.cast(); } else if (py::isinstance(place_obj)) { return place_obj.cast(); + } else if (py::isinstance(place_obj)) { + return place_obj.cast(); } else if (py::isinstance(place_obj)) { return place_obj.cast(); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Place should be one of CPUPlace/CUDAPlace/CUDAPinnedPlace")); + "Place should be one of CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); } } @@ -78,16 +80,23 @@ static void InitTensorForVarBase(imperative::VarBase *self, const py::array &array, const platform::Place place, bool persistable = false, - bool zero_copy = false, - std::string name = "") { + bool zero_copy = false, std::string name = "", + int stop_gradient = -1) { if (name == "") { - name = imperative::GetCurrentTracer()->GenerateUniqueName("generated_var"); + name = + imperative::GetCurrentTracer()->GenerateUniqueName("generated_tensor"); } + VLOG(5) << "Init Tensor as: / name: " << name + << " / persistable: " << persistable << " / zero_copy: " << zero_copy + << " / stop_gradient: " << stop_gradient; new (self) imperative::VarBase(name); auto *tensor = self->MutableVar()->GetMutable(); if (platform::is_cpu_place(place)) { SetTensorFromPyArray( tensor, array, BOOST_GET_CONST(platform::CPUPlace, place), zero_copy); + } else if (platform::is_xpu_place(place)) { + SetTensorFromPyArray( + tensor, array, BOOST_GET_CONST(platform::XPUPlace, place), zero_copy); } else if (platform::is_gpu_place(place)) { SetTensorFromPyArray( tensor, array, BOOST_GET_CONST(platform::CUDAPlace, place), zero_copy); @@ -97,7 +106,10 @@ static void InitTensorForVarBase(imperative::VarBase *self, zero_copy); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Place should be one of CPUPlace/CUDAPlace/CUDAPinnedPlace")); + "Place should be one of CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); + } + if (stop_gradient != -1) { + self->SetOverridedStopGradient(stop_gradient); } self->SetPersistable(persistable); self->SetType(framework::proto::VarType::LOD_TENSOR); @@ -106,12 +118,11 @@ static void InitTensorForVarBase(imperative::VarBase *self, static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self, const py::kwargs &kwargs) { - VLOG(4) << "Init VarBase"; + VLOG(4) << "Init VarBase from kwargs: "; PADDLE_ENFORCE_EQ( kwargs.contains("value"), true, platform::errors::NotFound( "The kwargs used to create Varbase misses argument: value")); - auto persistable = kwargs.contains("persistable") ? kwargs["persistable"].cast() : false; @@ -120,10 +131,14 @@ static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self, auto zero_copy = kwargs.contains("zero_copy") ? kwargs["zero_copy"].cast() : false; auto name = kwargs.contains("name") ? kwargs["name"].cast() : ""; + auto stop_gradient = kwargs.contains("stop_gradient") + ? kwargs["stop_gradient"].cast() + : -1; auto default_place = imperative::GetCurrentTracer()->ExpectedPlace(); auto place = kwargs.contains("place") ? PyObjectToPlace(kwargs["place"]) : default_place; - InitTensorForVarBase(self, array, place, persistable, zero_copy, name); + InitTensorForVarBase(self, array, place, persistable, zero_copy, name, + stop_gradient); } template @@ -131,15 +146,24 @@ static void InitVarBaseFromNumpyWithArg(imperative::VarBase *self, const py::array &array, const P &place, bool persistable = false, bool zero_copy = false, - std::string name = "") { - VLOG(4) << "Init VarBase"; - // 0: self, 1: value, 2: place, 3: persistable, 4: zero_copy, 5: name + std::string name = "", + int stop_gradient = -1) { + VLOG(4) << "Init VarBase from Arg: "; + // 0: self, 1: value, 2: place, 3: persistable, 4: zero_copy, 5: name , 6: + // stop_gradient if (name == "") { - name = imperative::GetCurrentTracer()->GenerateUniqueName("generated_var"); + name = + imperative::GetCurrentTracer()->GenerateUniqueName("generated_tensor"); } + VLOG(5) << "Init Tensor as: / name: " << name + << " / persistable: " << persistable << " / zero_copy: " << zero_copy + << " / stop_gradient: " << stop_gradient; new (self) imperative::VarBase(name); self->SetPersistable(persistable); auto *tensor = self->MutableVar()->GetMutable(); + if (stop_gradient != -1) { + self->SetOverridedStopGradient(stop_gradient); + } SetTensorFromPyArray

(tensor, array, place, zero_copy); self->SetType(framework::proto::VarType::LOD_TENSOR); self->SetDataType(tensor->type()); @@ -147,7 +171,7 @@ static void InitVarBaseFromNumpyWithArg(imperative::VarBase *self, static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self, const py::array &array) { - VLOG(4) << "Init VarBase"; + VLOG(4) << "Init VarBase from numpy: "; auto place = imperative::GetCurrentTracer()->ExpectedPlace(); InitTensorForVarBase(self, array, place); } @@ -157,7 +181,7 @@ static void InitVarBaseFromTensorWithArgDefault( VLOG(4) << "Init VarBase"; auto place = imperative::GetCurrentTracer()->ExpectedPlace(); new (self) imperative::VarBase( - imperative::GetCurrentTracer()->GenerateUniqueName("generated_var")); + imperative::GetCurrentTracer()->GenerateUniqueName("generated_tensor")); self->SetPersistable(false); self->SetType(framework::proto::VarType::LOD_TENSOR); self->SetDataType(tensor.type()); @@ -551,7 +575,7 @@ void BindImperative(py::module *m_ptr) { std::string act_name = ""; if (!name.ptr() || name.ptr() == Py_None) { act_name = imperative::GetCurrentTracer()->GenerateUniqueName( - "generated_var"); + "generated_tensor"); } else { act_name = name.cast(); } @@ -567,13 +591,20 @@ void BindImperative(py::module *m_ptr) { }) .def("__init__", &InitVarBaseFromNumpyWithArg, py::arg("value"), py::arg("place"), py::arg("persistable") = false, - py::arg("zero_copy") = false, py::arg("name") = "") + py::arg("zero_copy") = false, py::arg("name") = "", + py::arg("stop_gradient") = -1) + .def("__init__", &InitVarBaseFromNumpyWithArg, + py::arg("value"), py::arg("place"), py::arg("persistable") = false, + py::arg("zero_copy") = false, py::arg("name") = "", + py::arg("stop_gradient") = -1) .def("__init__", &InitVarBaseFromNumpyWithArg, py::arg("value"), py::arg("place"), py::arg("persistable") = false, - py::arg("zero_copy") = false, py::arg("name") = "") + py::arg("zero_copy") = false, py::arg("name") = "", + py::arg("stop_gradient") = -1) .def("__init__", &InitVarBaseFromNumpyWithArg, py::arg("value"), py::arg("place"), py::arg("persistable") = false, - py::arg("zero_copy") = false, py::arg("name") = "") + py::arg("zero_copy") = false, py::arg("name") = "", + py::arg("stop_gradient") = -1) .def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value")) .def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor")) .def("__init__", &InitVarBaseFromNumpyWithKwargs) @@ -796,6 +827,15 @@ void BindImperative(py::module *m_ptr) { [](const imperative::VarBase &self, const platform::CPUPlace &place, bool blocking) { return self.NewVarBase(place, blocking); }, py::return_value_policy::copy) + .def("_copy_to", + [](const imperative::VarBase &self, + const platform::CUDAPinnedPlace &place, + bool blocking) { return self.NewVarBase(place, blocking); }, + py::return_value_policy::copy) + .def("_copy_to", + [](const imperative::VarBase &self, const platform::XPUPlace &place, + bool blocking) { return self.NewVarBase(place, blocking); }, + py::return_value_policy::copy) .def("_copy_to", [](const imperative::VarBase &self, const platform::CUDAPlace &place, bool blocking) { return self.NewVarBase(place, blocking); }, @@ -824,6 +864,9 @@ void BindImperative(py::module *m_ptr) { return std::vector(); } }) + .def_property_readonly( + "place", [](imperative::VarBase &self) { return self.Place(); }, + py::return_value_policy::copy) .def_property_readonly("type", &imperative::VarBase::Type) .def_property_readonly("dtype", &imperative::VarBase::DataType); @@ -860,6 +903,9 @@ void BindImperative(py::module *m_ptr) { if (py::isinstance(obj)) { auto p = obj.cast(); self.SetExpectedPlace(*p); + } else if (py::isinstance(obj)) { + auto p = obj.cast(); + self.SetExpectedPlace(*p); } else if (py::isinstance(obj)) { auto p = obj.cast(); self.SetExpectedPlace(*p); @@ -868,7 +914,8 @@ void BindImperative(py::module *m_ptr) { self.SetExpectedPlace(*p); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Incompatible Place Type: supports CUDAPlace, CPUPlace, " + "Incompatible Place Type: supports XPUPlace, CUDAPlace, " + "CPUPlace, " "and CUDAPinnedPlace, " "but got Unknown Type!")); } @@ -898,6 +945,19 @@ void BindImperative(py::module *m_ptr) { *(imperative::AmpOperators::Instance().GetAllowOps()), *(imperative::AmpOperators::Instance().GetBlockOps())); }) + .def("trace", + [](imperative::Tracer &self, const std::string &type, + const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs, + framework::AttributeMap attrs, const platform::XPUPlace &place, + bool trace_backward) { + auto ins_map = ConvertToNameVarBaseMap(ins); + auto outs_map = ConvertToNameVarBaseMap(outs); + { + py::gil_scoped_release release; + self.TraceOp(type, std::move(ins_map), std::move(outs_map), + std::move(attrs), place, trace_backward); + } + }) .def("trace", [](imperative::Tracer &self, const std::string &type, const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs, diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h index 597ead9327e233df785b58437afce8fa75a058c3..70b321f658cd2cf1bd43cd6440bf83e1f4dab140 100644 --- a/paddle/fluid/pybind/op_function.h +++ b/paddle/fluid/pybind/op_function.h @@ -18,9 +18,11 @@ #include #include #include + #include #include #include + #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/variable.h" @@ -31,15 +33,93 @@ namespace py = pybind11; namespace paddle { namespace pybind { -static inline void ConstructAttrMapFromPyArgs(framework::AttributeMap* attrs, + +static inline std::shared_ptr CastPyHandleToVarBase( + const std::string& op_type, const std::string& arg_name, int arg_idx, + const py::handle& handle) { + PyObject* py_obj = handle.ptr(); // get underlying PyObject + if (!py_obj || py_obj == Py_None) { + return nullptr; + } + try { + return py::cast>(py::handle(py_obj)); + } catch (py::cast_error&) { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument '%s' (position %d) must be Tensor, but got " + "%s", + op_type, arg_name, arg_idx, Py_TYPE(py_obj)->tp_name)); + } +} + +static inline std::vector> +CastPyHandleToVarBaseList(const std::string& op_type, + const std::string& arg_name, int arg_idx, + const py::handle& handle) { + PyObject* py_obj = handle.ptr(); // get underlying PyObject + if (!py_obj || py_obj == Py_None) { + return {}; + } + std::vector> result; + if (PyList_Check(py_obj) || PyTuple_Check(py_obj)) { + auto size = PyTuple_Check(py_obj) ? PyTuple_GET_SIZE(py_obj) + : PyList_GET_SIZE(py_obj); + for (auto i = 0; i < size; ++i) { + PyObject* item = PyTuple_Check(py_obj) ? PyTuple_GET_ITEM(py_obj, i) + : PyList_GET_ITEM(py_obj, i); + if (!item || item == Py_None) { + result.emplace_back(nullptr); + continue; + } + try { + result.emplace_back( + py::cast>(py::handle(item))); + } catch (py::cast_error&) { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument '%s' (position %d) must be list of " + "Tensors, but " + "got %s in list (item %d)", + op_type, arg_name, arg_idx, Py_TYPE(item)->tp_name, i)); + } + } + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument '%s' (position %d) must be list of Tensors, but got " + "%s", + op_type, arg_name, arg_idx, Py_TYPE(py_obj)->tp_name)); + } + return result; +} // namespace pybind + +static inline void ConstructAttrMapFromPyArgs(const std::string& op_type, + int start_idx, + framework::AttributeMap* attrs, const py::args& args) { PADDLE_ENFORCE_EQ( args.size() % 2, 0, platform::errors::InvalidArgument( "The number of arguments for arributes should be even.")); for (size_t i = 0; i < args.size(); i += 2) { - auto name = args[i].cast(); - auto value = args[i + 1].cast(); + std::string name; + framework::Attribute value; + try { + name = args[i].cast(); + } catch (std::exception& e) { + PyObject* py_obj = args[i].ptr(); // get underlying PyObject + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument (position %d) must be str, but got " + "%s", + op_type, start_idx + i, Py_TYPE(py_obj)->tp_name)); + } + try { + value = args[i + 1].cast(); + } catch (std::exception& e) { + PyObject* py_obj = args[i + 1].ptr(); // get underlying PyObject + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument (position %d) must be " + "Attribute type (one of str, bool, int, int64, float, or list of " + "them), but got %s", + op_type, start_idx + i + 1, Py_TYPE(py_obj)->tp_name)); + } (*attrs)[name] = value; } } diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index b32f5e8847d30fc785587541ccdc74d99d2b025c..ec458ee7957c6a496ad4bd5579fe0b3c8a72069d 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -41,6 +41,8 @@ std::map> op_ins_map = { {"fake_quantize_dequantize_moving_average_abs_max", {"X", "InScale", "InAccum", "InState"}}, {"nll_loss", {"X", "Label", "Weight"}}, + {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}}, + {"gather", {"X", "Index", "Axis"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -57,6 +59,9 @@ std::map> op_outs_map = { {"batch_norm", {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", "ReserveSpace"}}, + {"sync_batch_norm", + {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", + "ReserveSpace"}}, }; // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are @@ -76,6 +81,7 @@ std::map> op_passing_outs_map = { {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}}, {"momentum", {"ParamOut", "VelocityOut"}}, {"batch_norm", {"MeanOut", "VarianceOut"}}, + {"sync_batch_norm", {"MeanOut", "VarianceOut"}}, {"accuracy", {"Correct", "Total"}}, {"fill_constant", {"Out"}}, {"matmul", {"Out"}}, @@ -116,8 +122,19 @@ const char* OUTPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST = R"( const char* ARG_OUT_NUM = R"(%sNum)"; const char* ARG_OUT_NUM_TYPE = R"(size_t )"; -const char* VAR_TYPE = R"(std::shared_ptr)"; -const char* VAR_LIST_TYPE = R"(std::vector>)"; +const char* IN_VAR_TYPE = R"(py::handle)"; +const char* IN_VAR_LIST_TYPE = R"(py::handle)"; + +const char* OUT_VAR_TYPE = R"(std::shared_ptr)"; +const char* OUT_VAR_LIST_TYPE = R"(std::vector>)"; + +const char* CAST_VAR_TEMPLATE = R"( + auto %s = CastPyHandleToVarBase("%s", "%s", %d, %s);)"; + +const char* CAST_VAR_LIST_TEMPLATE = R"( + auto %s = CastPyHandleToVarBaseList("%s", "%s", %d, %s);)"; + + const char* ARG_TEMPLATE = R"(const %s& %s)"; const char* RETURN_TUPLE_TYPE = R"(std::tuple<%s>)"; @@ -133,8 +150,9 @@ const char* OP_FUNCTION_TEMPLATE = R"( %s %s(%s) { + %s framework::AttributeMap attrs; - ConstructAttrMapFromPyArgs(&attrs, args); + ConstructAttrMapFromPyArgs("%s", %d, &attrs, args); { py::gil_scoped_release release; auto tracer = imperative::GetCurrentTracer(); @@ -164,6 +182,10 @@ static inline bool FindPassingOutsMap(const std::string& op_type, return op_passing_outs_map[op_type].count(out_name); } +static inline std::string TempName(const std::string& name) { + return name + '_'; +} + static std::tuple, std::vector> GenerateOpFunctions(const std::string& module_name) { auto& op_info_map = paddle::framework::OpInfoMap::Instance().map(); @@ -187,16 +209,26 @@ GenerateOpFunctions(const std::string& module_name) { std::string ins_initializer = "{"; std::string ins_initializer_with_null = ""; std::string py_arg = ""; + int arg_idx = 0; + int input_args_num = 0; + std::string ins_cast_str = ""; for (auto& input : op_proto->inputs()) { auto& in_name = input.name(); // skip those dispensable inputs, like ResidualData in conv2d if (input.dispensable() && !FindInsMap(op_type, in_name)) { continue; } - const auto in_type = input.duplicable() ? VAR_LIST_TYPE : VAR_TYPE; - auto input_arg = paddle::string::Sprintf(ARG_TEMPLATE, in_type, in_name); + const auto in_type = input.duplicable() ? IN_VAR_LIST_TYPE : IN_VAR_TYPE; + auto input_arg = + paddle::string::Sprintf(ARG_TEMPLATE, in_type, TempName(in_name)); input_args += input_arg; input_args += ","; + input_args_num++; + const auto in_cast_type = + input.duplicable() ? CAST_VAR_LIST_TEMPLATE : CAST_VAR_TEMPLATE; + ins_cast_str += + paddle::string::Sprintf(in_cast_type, in_name, op_type, in_name, + arg_idx++, TempName(in_name)); if (input.dispensable()) { const auto in_template = input.duplicable() @@ -235,7 +267,8 @@ GenerateOpFunctions(const std::string& module_name) { if (output.dispensable() && !FindOutsMap(op_type, out_name)) { continue; } - const auto out_type = output.duplicable() ? VAR_LIST_TYPE : VAR_TYPE; + const auto out_type = + output.duplicable() ? OUT_VAR_LIST_TYPE : OUT_VAR_TYPE; const auto return_template = output.duplicable() ? RETURN_LIST_TEMPLATE : RETURN_TEMPLATE; if (FindPassingOutsMap(op_type, out_name)) { @@ -244,6 +277,7 @@ GenerateOpFunctions(const std::string& module_name) { } input_args += out_type; input_args += out_name; + input_args_num++; if (output.dispensable()) { const auto out_template = @@ -270,6 +304,7 @@ GenerateOpFunctions(const std::string& module_name) { auto out_num_str = paddle::string::Sprintf(ARG_OUT_NUM, out_name); input_args += ARG_OUT_NUM_TYPE; input_args += out_num_str; + input_args_num++; outs_initializer += paddle::string::Sprintf( OUT_DUPLICABLE_INITIALIZER_TEMPLATE, out_name, out_num_str); } else { @@ -309,9 +344,9 @@ GenerateOpFunctions(const std::string& module_name) { // generate op funtcion body auto op_function_str = paddle::string::Sprintf( OP_FUNCTION_TEMPLATE, return_type, func_name, function_args, - outs_initializer, ins_initializer, - ins_initializer_with_null + outs_initializer_with_null, op_type, - return_str); + ins_cast_str, op_type, input_args_num, outs_initializer, + ins_initializer, ins_initializer_with_null + outs_initializer_with_null, + op_type, return_str); // generate pybind item auto bind_function_str = paddle::string::Sprintf( diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d68e225849e7fcfa7c7297942df96e2fede30f8e..13aa8d3f8e118cf42655a5df05a9576860558c7e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -64,6 +64,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/data_set_py.h" #include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/fleet_wrapper_py.h" +#include "paddle/fluid/pybind/generator_py.h" #include "paddle/fluid/pybind/global_value_getter_setter.h" #include "paddle/fluid/pybind/gloo_wrapper_py.h" #include "paddle/fluid/pybind/heter_wrapper_py.h" @@ -89,6 +90,10 @@ limitations under the License. */ #include "paddle/fluid/platform/gpu_info.h" #endif +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu_info.h" +#endif + #ifdef PADDLE_WITH_DISTRIBUTE #include "paddle/fluid/pybind/communicator_py.h" #endif @@ -117,6 +122,14 @@ bool IsCompiledWithCUDA() { #endif } +bool IsCompiledWithXPU() { +#ifndef PADDLE_WITH_XPU + return false; +#else + return true; +#endif +} + bool IsCompiledWithMKLDNN() { #ifndef PADDLE_WITH_MKLDNN return false; @@ -341,6 +354,10 @@ PYBIND11_MODULE(core_noavx, m) { m.def("set_num_threads", &platform::SetNumThreads); +#ifdef PADDLE_WITH_CUDA + m.def("cudnn_version", &platform::CudnnVersion); +#endif + m.def("from_dlpack", [](py::capsule *dltensor) { DLManagedTensor *dmt = reinterpret_cast( PyCapsule_GetPointer(dltensor->ptr(), "dltensor")); @@ -466,6 +483,10 @@ PYBIND11_MODULE(core_noavx, m) { [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) + .def("_alloc_float", + [](Tensor &self, paddle::platform::XPUPlace &place) { + self.mutable_data(place); + }) .def("_alloc_float", [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); @@ -478,6 +499,10 @@ PYBIND11_MODULE(core_noavx, m) { [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) + .def("_alloc_int", + [](Tensor &self, paddle::platform::XPUPlace &place) { + self.mutable_data(place); + }) .def("_alloc_int", [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); @@ -495,6 +520,11 @@ PYBIND11_MODULE(core_noavx, m) { paddle::framework::proto::VarType::Type type) { return reinterpret_cast(self.mutable_data(place, type)); }) + .def("_mutable_data", + [](Tensor &self, paddle::platform::XPUPlace &place, + paddle::framework::proto::VarType::Type type) { + return reinterpret_cast(self.mutable_data(place, type)); + }) .def("_mutable_data", [](Tensor &self, paddle::platform::CUDAPlace &place, paddle::framework::proto::VarType::Type type) { @@ -508,6 +538,8 @@ PYBIND11_MODULE(core_noavx, m) { .def("_clear", &Tensor::clear) .def("set", SetTensorFromPyArray, py::arg("array"), py::arg("place"), py::arg("zero_copy") = false) + .def("set", SetTensorFromPyArray, + py::arg("array"), py::arg("place"), py::arg("zero_copy") = false) .def("set", SetTensorFromPyArray, py::arg("array"), py::arg("place"), py::arg("zero_copy") = false) .def("set", SetTensorFromPyArray, @@ -517,7 +549,7 @@ PYBIND11_MODULE(core_noavx, m) { Args: lod (numpy.ndarray): The data to set. - place (CPUPlace|CUDAPlace|CUDAPinnedPlace): The place where the + place (CPUPlace|CUDAPlace|XPUPlace|CUDAPinnedPlace): The place where the LoDTensor is to be set. zero_copy (bool, optional): Whether to share memory with the input numpy array. This parameter only works with CPUPlace. Default: False. @@ -1070,7 +1102,7 @@ All parameter, weight, gradient are variables in Paddle. .def("find_var", &Scope::FindVar, py::arg("name"), R"DOC( Find variable named :code:`name` in the current scope or - its parent scope. Return None if not found. + its parent scope. Return None if not found. Args: name (str): the variable name. @@ -1225,6 +1257,18 @@ All parameter, weight, gradient are variables in Paddle. -> paddle::platform::DeviceContext* { return new paddle::platform::CPUDeviceContext(); }) + .def_static("create", + [](paddle::platform::XPUPlace& place) + -> paddle::platform::DeviceContext* { +#ifndef PADDLE_WITH_XPU + PADDLE_THROW( + platform::errors::PermissionDenied( + "Cannot use XPUPlace in CPU/GPU version, " + "Please recompile or reinstall Paddle with XPU support.")); +#else + return new paddle::platform::XPUDeviceContext(place); +#endif + }) .def_static("create", [](paddle::platform::CUDAPlace& place) -> paddle::platform::DeviceContext* { @@ -1319,14 +1363,75 @@ All parameter, weight, gradient are variables in Paddle. std::exit(-1); #endif }) +#ifdef PADDLE_WITH_CUDA + .def("get_device_id", + [](const platform::CUDAPlace &self) { return self.GetDeviceId(); }) .def("_type", &PlaceIndex) .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) + .def("_get_device_id", + [](platform::CUDAPlace &self) -> int { return self.GetDeviceId(); }) +#endif .def("__str__", string::to_string); + py::class_(m, "XPUPlace", R"DOC( + **Note**: + Examples: + .. code-block:: python + import paddle.fluid as fluid + xpu_place = fluid.XPUPlace(0) + )DOC") + .def("__init__", + [](platform::XPUPlace &self, int dev_id) { +#ifdef PADDLE_WITH_XPU + if (UNLIKELY(dev_id < 0)) { + LOG(ERROR) << string::Sprintf( + "Invalid XPUPlace(%d), device id must be 0 or " + "positive integer", + dev_id); + std::exit(-1); + } + if (UNLIKELY(dev_id >= platform::GetXPUDeviceCount())) { + if (platform::GetXPUDeviceCount() == 0) { + LOG(ERROR) << "Cannot use XPU because there is no XPU " + "detected on your " + "machine."; + std::exit(-1); + } else { + LOG(ERROR) << string::Sprintf( + "Invalid XPUPlace(%d), must inside [0, %d), because XPU " + "number on your machine is %d", + dev_id, platform::GetXPUDeviceCount(), + platform::GetXPUDeviceCount()); + std::exit(-1); + } + } + new (&self) platform::XPUPlace(dev_id); +#else + LOG(ERROR) << string::Sprintf( + "Cannot use XPU because you have installed CPU/GPU version " + "PaddlePaddle.\n" + "If you want to use XPU, please try to install XPU version " + "PaddlePaddle by: pip install paddlepaddle-xpu\n" + "If you only have CPU, please change XPUPlace(%d) to be " + "CPUPlace().\n", + dev_id); + std::exit(-1); +#endif + }) + .def("_type", &PlaceIndex) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", + &IsSamePlace) + .def("__str__", string::to_string); + py::class_(m, "CPUPlace", R"DOC( CPUPlace is a descriptor of a device. It represents a CPU device allocated or to be allocated with Tensor or LoDTensor. @@ -1341,6 +1446,7 @@ All parameter, weight, gradient are variables in Paddle. .def(py::init<>()) .def("_type", &PlaceIndex) .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) .def("_equals", @@ -1375,6 +1481,8 @@ All parameter, weight, gradient are variables in Paddle. .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) + .def("_equals", + &IsSamePlace) .def("_equals", &IsSamePlace) .def("_equals", @@ -1387,11 +1495,14 @@ All parameter, weight, gradient are variables in Paddle. .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) .def("_equals", &IsSamePlace) .def("is_gpu_place", [](platform::Place &self) { return platform::is_gpu_place(self); }) .def("is_cpu_place", [](platform::Place &self) { return platform::is_cpu_place(self); }) + .def("is_xpu_place", + [](platform::Place &self) { return platform::is_xpu_place(self); }) .def("is_cuda_pinned_place", [](platform::Place &self) { return platform::is_cuda_pinned_place(self); @@ -1400,12 +1511,20 @@ All parameter, weight, gradient are variables in Paddle. [](platform::Place &self) { return BOOST_GET_CONST(platform::CUDAPlace, self).device; }) + .def("xpu_device_id", + [](platform::Place &self) { + return BOOST_GET_CONST(platform::XPUPlace, self).device; + }) .def("set_place", [](platform::Place &self, const platform::Place &other) { self = other; }) .def("set_place", [](platform::Place &self, const platform::CPUPlace &cpu_place) { self = cpu_place; }) + .def("set_place", + [](platform::Place &self, const platform::XPUPlace &xpu_place) { + self = xpu_place; + }) .def("set_place", [](platform::Place &self, const platform::CUDAPlace &gpu_place) { self = gpu_place; @@ -1433,6 +1552,9 @@ All parameter, weight, gradient are variables in Paddle. .def("run", [](OperatorBase &self, const Scope &scope, const platform::CPUPlace &place) { self.Run(scope, place); }) + .def("run", + [](OperatorBase &self, const Scope &scope, + const platform::XPUPlace &place) { self.Run(scope, place); }) .def("run", [](OperatorBase &self, const Scope &scope, const platform::CUDAPlace &place) { self.Run(scope, place); }) @@ -1533,6 +1655,7 @@ All parameter, weight, gradient are variables in Paddle. [](bool init_p2p) { framework::InitDevices(init_p2p); }); m.def("is_compiled_with_cuda", IsCompiledWithCUDA); + m.def("is_compiled_with_xpu", IsCompiledWithXPU); m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_dist", IsCompiledWithDIST); @@ -2493,6 +2616,7 @@ All parameter, weight, gradient are variables in Paddle. BindNode(&m); BindInferenceApi(&m); BindDataset(&m); + BindGenerator(&m); #ifdef PADDLE_WITH_CRYPTO BindCrypto(&m); #endif diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index ba79c4b44374eb9b50ad4982a2eacd664fc6e75e..0b4e4502bb816c4ca293e54d5f78e8e504df2b48 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -145,8 +145,14 @@ T TensorGetElement(const framework::Tensor &self, size_t offset) { T b = static_cast(0); if (platform::is_cpu_place(self.place())) { b = self.data()[offset]; + } else if (platform::is_xpu_place(self.place())) { +#ifdef PADDLE_WITH_XPU + const T *a = self.data(); + auto p = BOOST_GET_CONST(platform::XPUPlace, self.place()); + paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T)); +#endif + } else if (platform::is_gpu_place(self.place())) { #ifdef PADDLE_WITH_CUDA - } else { const T *a = self.data(); auto p = BOOST_GET_CONST(platform::CUDAPlace, self.place()); paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T), @@ -163,8 +169,14 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { "The offset exceeds the size of tensor.")); if (platform::is_cpu_place(self->place())) { self->mutable_data(self->place())[offset] = elem; + } else if (platform::is_xpu_place(self->place())) { +#ifdef PADDLE_WITH_XPU + auto p = BOOST_GET_CONST(platform::XPUPlace, self->place()); + T *a = self->mutable_data(p); + paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T)); +#endif + } else if (platform::is_gpu_place(self->place())) { #ifdef PADDLE_WITH_CUDA - } else { auto p = BOOST_GET_CONST(platform::CUDAPlace, self->place()); T *a = self->mutable_data(p); paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T), @@ -194,6 +206,16 @@ void SetTensorFromPyArrayT( auto dst = self->mutable_data(place); std::memcpy(dst, array.data(), array.nbytes()); } + } else if (paddle::platform::is_xpu_place(place)) { +#ifdef PADDLE_WITH_XPU + auto dst = self->mutable_data(place); + xpu_memcpy(dst, array.data(), array.nbytes(), + XPUMemcpyKind::XPU_HOST_TO_DEVICE); +#else + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot use XPUPlace in CPU/GPU version, " + "Please recompile or reinstall Paddle with XPU support.")); +#endif } else { #ifdef PADDLE_WITH_CUDA auto dst = self->mutable_data(place); @@ -211,7 +233,7 @@ void SetTensorFromPyArrayT( } #else PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot use CUDAPlace in CPU only version, " + "Cannot use CUDAPlace or CUDAPinnedPlace in CPU only version, " "Please recompile or reinstall Paddle with CUDA support.")); #endif } @@ -354,8 +376,13 @@ inline framework::Tensor *_getTensor(const framework::Tensor &self, if (platform::is_cpu_place(place)) { output->mutable_data(BOOST_GET_CONST(platform::CPUPlace, place), self.type()); -#ifdef PADDLE_WITH_CUDA + } else if (platform::is_xpu_place(place)) { +#ifdef PADDLE_WITH_XPU + output->mutable_data(BOOST_GET_CONST(platform::XPUPlace, place), + self.type()); +#endif } else { +#ifdef PADDLE_WITH_CUDA if (platform::is_cuda_pinned_place(place)) { output->mutable_data(BOOST_GET_CONST(platform::CUDAPinnedPlace, place), self.type()); @@ -516,6 +543,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, return py::array(); } bool is_gpu_tensor = platform::is_gpu_place(tensor.place()); + bool is_xpu_tensor = platform::is_xpu_place(tensor.place()); const auto &tensor_dims = tensor.dims(); auto tensor_dtype = tensor.type(); size_t sizeof_dtype = framework::SizeOfType(tensor_dtype); @@ -534,7 +562,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(tensor.type()); - if (!is_gpu_tensor) { + if (!is_gpu_tensor && !is_xpu_tensor) { if (!need_deep_copy) { return py::array(py::buffer_info( const_cast(tensor_buf_ptr), sizeof_dtype, py_dtype_str, @@ -557,28 +585,54 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, copy_bytes); return py_arr; } - } - + } else if (is_xpu_tensor) { +#ifdef PADDLE_WITH_XPU + py::array py_arr(py::dtype(py_dtype_str.c_str()), py_dims, py_strides); + PADDLE_ENFORCE_EQ(py_arr.writeable(), true, + platform::errors::InvalidArgument( + "PyArray is not writable, in which case memory leak " + "or double free would occur")); + PADDLE_ENFORCE_EQ( + py_arr.owndata(), true, + platform::errors::InvalidArgument( + "PyArray does not own data, in which case memory leak " + "or double free would occur")); + + size_t copy_bytes = sizeof_dtype * numel; + auto p = BOOST_GET_CONST(platform::XPUPlace, tensor.place()); + paddle::memory::Copy(platform::CPUPlace(), py_arr.mutable_data(), p, + tensor_buf_ptr, copy_bytes); + return py_arr; +#else + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot use XPUPlace in CPU/GPU version, " + "Please recompile or reinstall Paddle with XPU support.")); +#endif + } else if (is_gpu_tensor) { #ifdef PADDLE_WITH_CUDA - py::array py_arr(py::dtype(py_dtype_str.c_str()), py_dims, py_strides); - PADDLE_ENFORCE_EQ(py_arr.writeable(), true, - platform::errors::InvalidArgument( - "PyArray is not writable, in which case memory leak " - "or double free would occur")); - PADDLE_ENFORCE_EQ(py_arr.owndata(), true, - platform::errors::InvalidArgument( - "PyArray does not own data, in which case memory leak " - "or double free would occur")); - - size_t copy_bytes = sizeof_dtype * numel; - paddle::platform::GpuMemcpySync(py_arr.mutable_data(), tensor_buf_ptr, - copy_bytes, cudaMemcpyDeviceToHost); - return py_arr; + py::array py_arr(py::dtype(py_dtype_str.c_str()), py_dims, py_strides); + PADDLE_ENFORCE_EQ(py_arr.writeable(), true, + platform::errors::InvalidArgument( + "PyArray is not writable, in which case memory leak " + "or double free would occur")); + PADDLE_ENFORCE_EQ( + py_arr.owndata(), true, + platform::errors::InvalidArgument( + "PyArray does not own data, in which case memory leak " + "or double free would occur")); + + size_t copy_bytes = sizeof_dtype * numel; + paddle::platform::GpuMemcpySync(py_arr.mutable_data(), tensor_buf_ptr, + copy_bytes, cudaMemcpyDeviceToHost); + return py_arr; #else - PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot use CUDAPlace in CPU only version, " - "Please recompile or reinstall Paddle with CUDA support.")); + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot use CUDAPlace in CPU only version, " + "Please recompile or reinstall Paddle with CUDA support.")); #endif + } + PADDLE_THROW(platform::errors::Unimplemented("Place is not supported")); + return py::array(); } } // namespace pybind diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 7d77ace91875547a3a2d418e87e0771aae662f79..c84574b21d883b24e1f89c59c3a724aae6621479 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -1,4 +1,4 @@ -@ECHO OFF +@ECHO ON SETLOCAL rem Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. @@ -19,22 +19,73 @@ rem ================================================= rem Paddle CI Task On Windows Platform rem ================================================= - set work_dir=%cd% +if exist build rmdir build /s/q +mkdir build +cd /d build + +rem ------initialize the virtual environment------ +if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37 +set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH% + +rem ToDo: virtual environment can't be deleted safely, some process not exit when task is canceled +rem Now use system python environment temporarily +rem set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe +rem %PYTHON_EXECUTABLE% -m pip install virtualenv +rem %PYTHON_EXECUTABLE% -m virtualenv paddle_winci +rem call paddle_winci\Scripts\activate.bat + +rem ------pre install requirement---------- +where python +where pip +pip install --upgrade pip --user +pip install wheel --user +pip install gym --user +pip install -U -r %work_dir%\python\requirements.txt --user +if %ERRORLEVEL% NEQ 0 ( + call paddle_winci\Scripts\deactivate.bat 2>NUL + echo pip install requirements.txt failed! + exit /b 7 +) + +rem ------initialize common variable------ +if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined BRANCH set BRANCH=develop -if not defined PYTHON_ROOT set PYTHON_ROOT=c:\Python27 -if not defined WITH_MKL set WITH_MKL=ON if not defined WITH_AVX set WITH_AVX=ON -if not defined WITH_AVX set WITH_AVX=ON -if not defined WITH_GPU set WITH_GPU=OFF if not defined WITH_TESTING set WITH_TESTING=ON if not defined WITH_PYTHON set WITH_PYTHON=ON if not defined ON_INFER set ON_INFER=ON if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=OFF -if not defined INFERENCE_DEMO_INSTALL_DIR set INFERENCE_DEMO_INSTALL_DIR=d:/.cache/inference_demo -if not defined THIRD_PARTY_PATH set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party -set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe -dir d:\.cache +if not defined WITH_TPCACHE set WITH_TPCACHE=ON + +rem ------set cache third_party------ +set cache_dir=%work_dir%\..\cache +dir %cache_dir% +set INFERENCE_DEMO_INSTALL_DIR=%cache_dir:\=/%/inference_demo + +if not exist %cache_dir%\tools ( + git clone https://github.com/zhouwei25/tools.git %cache_dir%\tools + if %ERRORLEVEL% NEQ 0 exit /b %ERRORLEVEL% +) + +if "%WITH_TPCACHE%"=="OFF" ( + set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party + goto :CASE_%1 +) + +echo set -ex > cache.sh +echo md5_content=$(cat %work_dir:\=/%/cmake/external/*.cmake ^|md5sum ^| awk '{print $1}') >> cache.sh +echo echo ${md5_content}^>md5.txt >> cache.sh + +%cache_dir%\tools\busybox64.exe cat cache.sh +%cache_dir%\tools\busybox64.exe bash cache.sh + +set /p md5=< md5.txt +if "%WITH_GPU%"=="ON" ( + set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party_GPU/%md5% +) else ( + set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party/%md5% +) goto :CASE_%1 @@ -45,6 +96,8 @@ echo "wincheck_openbals: run Windows OPENBLAS/CPU CI tasks on Windows" exit /b 1 :CASE_wincheck_mkl +set WITH_MKL=ON +set WITH_GPU=OFF call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -54,6 +107,8 @@ call :check_change_of_unittest || goto check_change_of_unittest_error goto:success :CASE_wincheck_openblas +set WITH_MKL=OFF +set WITH_GPU=ON call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -70,13 +125,14 @@ echo ======================================== echo Step 1. Cmake ... echo ======================================== -mkdir build -cd /d build -cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE% -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" -DON_INFER=%ON_INFER% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% +echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% -DON_INFER=%ON_INFER% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% +cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% -DON_INFER=%ON_INFER% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% goto:eof :cmake_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +echo Cmake failed, will exit! +exit /b 7 rem --------------------------------------------------------------------------------------------- :build @@ -84,38 +140,42 @@ echo ======================================== echo Step 2. Buile Paddle ... echo ======================================== call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -set build_times=1 +set build_times=1 :build_tp -echo BUILD THIRD_PARTY %build_times% +echo Build third_party for %build_times% time: msbuild /m /p:Configuration=Release /verbosity:quiet third_party.vcxproj -echo BUILD THIRD_PARTY RESULT %ERRORLEVEL% if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 if %build_times% GTR 3 ( - exit /b 1 + exit /b 7 ) else ( + echo Build third_party failed, will retry! goto :build_tp ) ) +echo Build third_party successfully! set build_times=1 :build_paddle -echo BUILD PADDLE %build_times% -msbuild /m /p:Configuration=Release /verbosity:quiet paddle.sln -echo BUILD PADDLE RESULT %ERRORLEVEL% +echo Build Paddle for %build_times% time: +msbuild /m /p:Configuration=Release /verbosity:minimal paddle.sln if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 if %build_times% GTR 2 ( - exit /b 1 + exit /b 7 ) else ( + echo Build Paddle failed, will retry! goto :build_paddle ) ) +echo Build Paddle successfully! goto:eof :build_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +echo Build Paddle failed, will exit! +exit /b 7 rem --------------------------------------------------------------------------------------------- :test_whl_pacakage @@ -124,20 +184,29 @@ echo Step 3. Test pip install whl package ... echo ======================================== dir /s /b python\dist\*.whl > whl_file.txt set /p PADDLE_WHL_FILE_WIN=< whl_file.txt -%PYTHON_EXECUTABLE% -m pip install -U %PADDLE_WHL_FILE_WIN% -echo import paddle.fluid;print(paddle.__version__) > test_whl.py -%PYTHON_EXECUTABLE% test_whl.py + +pip uninstall -y paddlepaddle +pip uninstall -y paddlepaddle-gpu +pip install -U %PADDLE_WHL_FILE_WIN% --user +if %ERRORLEVEL% NEQ 0 ( + call paddle_winci\Scripts\deactivate.bat 2>NUL + echo pip install whl package failed! + exit /b 3 +) + +python %work_dir%\paddle\scripts\installation_validate.py goto:eof :test_whl_pacakage_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +echo Test import paddle failed, will exit! +exit /b 3 rem --------------------------------------------------------------------------------------------- :unit_test echo ======================================== echo Step 4. Running unit tests ... echo ======================================== -%PYTHON_EXECUTABLE% -m pip install --upgrade pip dir %THIRD_PARTY_PATH:/=\%\install\openblas\lib dir %THIRD_PARTY_PATH:/=\%\install\openblas\bin @@ -147,27 +216,29 @@ dir %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin dir %THIRD_PARTY_PATH:/=\%\install\warpctc\bin set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% -ctest.exe --output-on-failure -C Release -j 10 +ctest.exe --output-on-failure -C Release -j 8 goto:eof :unit_test_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +echo Running unit tests failed, will exit! +exit /b 8 rem --------------------------------------------------------------------------------------------- :test_inference echo ======================================== echo Step 5. Testing fluid library for inference ... echo ======================================== -if NOT EXIST "d:\.cache\tools" ( - git clone https://github.com/zhouwei25/tools.git d:\.cache\tools -) + cd %work_dir%\paddle\fluid\inference\api\demo_ci -d:\.cache\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% d:/.cache/inference_demo +%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo goto:eof :test_inference_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +echo Testing fluid library for inference failed! +exit /b 5 rem --------------------------------------------------------------------------------------------- :check_change_of_unittest @@ -205,7 +276,7 @@ echo git fetch upstream $BRANCH # develop is not fetched>> check_change_of_ echo fi>> check_change_of_unittest.sh echo git checkout -b origin_pr >> check_change_of_unittest.sh echo git checkout -f $BRANCH >> check_change_of_unittest.sh -echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE:\=\\% -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" -DON_INFER=%ON_INFER% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% >> check_change_of_unittest.sh +echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% -DON_INFER=%ON_INFER% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% >> check_change_of_unittest.sh echo cat ^<^> check_change_of_unittest.sh echo ============================================ >> check_change_of_unittest.sh echo Generate unit tests.spec of develop. >> check_change_of_unittest.sh @@ -236,11 +307,12 @@ echo exit 1 >> check_change_of_unittest.sh echo fi>> check_change_of_unittest.sh echo fi>> check_change_of_unittest.sh echo git checkout -f origin_pr >> check_change_of_unittest.sh -d:\.cache\tools\busybox64.exe bash check_change_of_unittest.sh +%cache_dir%\tools\busybox64.exe bash check_change_of_unittest.sh goto:eof :check_change_of_unittest_error -exit /b %ERRORLEVEL% +call paddle_winci\Scripts\deactivate.bat 2>NUL +exit /b 1 rem --------------------------------------------------------------------------------------------- @@ -258,6 +330,10 @@ taskkill /f /im git-remote-https.exe 2>NUL taskkill /f /im vctip.exe 2>NUL taskkill /f /im cvtres.exe 2>NUL taskkill /f /im rc.exe 2>NUL +taskkill /f /im %cd%\paddle\fluid\pybind\Release\op_function_generator.exe 2>NUL +taskkill /f /im python.exe 2>NUL +call paddle_winci\Scripts\deactivate.bat 2>NUL +taskkill /f /im python.exe 2>NUL echo Windows CI run successfully! exit /b 0 diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 4efcec7a41ded9f76fedce534ff95224935cc01c..962ee53a1069b8cd7863a0f1dca616c939eb237d 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -195,6 +195,12 @@ function cmake_base() { distibuted_flag=${WITH_DISTRIBUTE:-OFF} grpc_flag=${WITH_GRPC:-${distibuted_flag}} + if [ "$SYSTEM" == "Darwin" ]; then + gloo_flag="OFF" + else + gloo_flag=${distibuted_flag} + fi + cat < $spec_path @@ -900,7 +910,7 @@ set +x multiple_card_tests="$multiple_card_tests|^$testcase$" fi else - if [[ "${#single_card_tests}" -gt 3000 ]];then + if [[ "${#single_card_tests}" -gt 10000 ]];then if [[ "$single_card_tests_1" == "" ]]; then single_card_tests_1="^$testcase$" else @@ -926,17 +936,96 @@ set +x card_test "$multiple_card_tests" 2 # run cases with two GPUs card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs collect_failed_tests - if [ -n "${failed_test_lists}" ];then - failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` - echo "========================================" - echo "Summary Failed Tests... " - echo "========================================" - echo "The following tests FAILED: " - echo "${failed_test_lists_ult}" - fi rm -f $tmp_dir/* + exec_times=0 + retry_unittests_record='' + retry_time=3 + exec_time_array=('first' 'second' 'third') + if [ -n "$failed_test_lists" ];then + while ( [ $exec_times -lt $retry_time ] && [ -n "${failed_test_lists}" ] ) + do + + retry_unittests_record="$retry_unittests_record$failed_test_lists" + failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` + read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(\w+\)" | sed 's/(.\+)//' | sed 's/- //' ) + echo "=========================================" + echo "This is the ${exec_time_array[$exec_times]} time to re-run" + echo "=========================================" + echo "The following unittest will be re-run:" + echo "${failed_test_lists_ult}" + + for line in ${retry_unittests[@]} ; + do + + one_card_tests=$single_card_tests'|'$single_card_tests_1 + + read tmp_one_tmp <<< "$( echo $one_card_tests | grep -oEi $line )" + read tmp_mul_tmp <<< "$( echo $multiple_card_tests | grep -oEi $line )" + read exclusive_tmp <<< "$( echo $exclusive_tests | grep -oEi $line )" + + if [[ "$tmp_one_tmp" != "" ]]; then + if [[ "$one_card_retry" == "" ]]; then + one_card_retry="^$line$" + else + one_card_retry="$one_card_retry|^$line$" + fi + elif [[ "$tmp_mul_tmp" != "" ]]; then + if [[ "$multiple_card_retry" == "" ]]; then + multiple_card_retry="^$line$" + else + multiple_card_retry="$multiple_card_retry|^$line$" + fi + else + if [[ "$exclusive_retry" == "" ]];then + exclusive_retry="^$line$" + else + exclusive_retry="$exclusive_retry|^$line$" + fi + fi + + done + + if [[ "$one_card_retry" != "" ]]; then + card_test "$one_card_retry" 1 + fi + + if [[ "$multiple_card_retry" != "" ]]; then + card_test "$multiple_card_retry" 2 + fi + + if [[ "$exclusive_retry" != "" ]]; then + card_test "$exclusive_retry" + fi + + exec_times=$[$exec_times+1] + failed_test_lists='' + collect_failed_tests + rm -f $tmp_dir/* + one_card_retry='' + multiple_card_retry='' + exclusive_retry='' + retry_unittests='' + done + fi + + + if [[ "$EXIT_CODE" != "0" ]]; then - exit 8; + if [[ "$failed_test_lists" == "" ]]; then + echo "========================================" + echo "There are failed tests, which have been successful after re-run:" + echo "========================================" + echo "The following tests have been re-ran:" + echo "${retry_unittests_record}" + else + failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` + echo "========================================" + echo "Summary Failed Tests... " + echo "========================================" + echo "The following tests FAILED: " + echo "${failed_test_lists_ult}" + exit 8; + fi fi set -ex fi diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 59dfc5c9d0311342fc72d8400a3abddd3f6d778b..779a6842ebb03e2afcdb7718f77eb9b0d2c09a83 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -75,14 +75,12 @@ IF(WIN32) add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/ COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES}) ELSE(WIN32) add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND touch stub.cc COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES}) ENDIF() @@ -93,6 +91,7 @@ set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) if (WITH_TESTING) add_subdirectory(paddle/reader/tests) add_subdirectory(paddle/dataset/tests) + add_subdirectory(paddle/tests) add_subdirectory(paddle/fluid/tests) add_subdirectory(paddle/fluid/contrib/tests) add_subdirectory(paddle/fluid/contrib/slim/tests) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 14824407284571619babe058393cfa5956f7d0cd..4e1e04043ad7d2fd72bfe891b755a2503c2096b3 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -31,26 +31,33 @@ import paddle.reader import paddle.dataset import paddle.batch batch = batch.batch +from .fluid import monkey_patch_variable +from .fluid.dygraph import monkey_patch_math_varbase +monkey_patch_variable() +monkey_patch_math_varbase() +import paddle.framework +from .framework import VarBase as Tensor +from .framework import ComplexVariable as ComplexTensor import paddle.compat import paddle.distributed import paddle.sysconfig import paddle.tensor +import paddle.distribution import paddle.nn import paddle.distributed.fleet -import paddle.framework import paddle.optimizer import paddle.metric +import paddle.device import paddle.incubate.complex as complex # TODO: define alias in tensor and framework directory from .tensor.random import randperm +from .tensor.random import bernoulli from .tensor.attribute import rank #DEFINE_ALIAS from .tensor.attribute import shape #DEFINE_ALIAS -from .tensor.creation import create_tensor #DEFINE_ALIAS -# from .tensor.creation import create_lod_tensor #DEFINE_ALIAS -# from .tensor.creation import create_random_int_lodtensor #DEFINE_ALIAS +from .tensor.creation import to_tensor #DEFINE_ALIAS from .tensor.creation import crop_tensor #DEFINE_ALIAS from .tensor.creation import diag #DEFINE_ALIAS from .tensor.creation import eye #DEFINE_ALIAS @@ -84,7 +91,7 @@ from .tensor.logic import equal #DEFINE_ALIAS from .tensor.logic import greater_equal #DEFINE_ALIAS from .tensor.logic import greater_than #DEFINE_ALIAS from .tensor.logic import is_empty #DEFINE_ALIAS -from .tensor.logic import isfinite #DEFINE_ALIAS +#from .tensor.logic import isfinite #DEFINE_ALIAS from .tensor.logic import less_equal #DEFINE_ALIAS from .tensor.logic import less_than #DEFINE_ALIAS from .tensor.logic import logical_and #DEFINE_ALIAS @@ -100,7 +107,9 @@ from .tensor.logic import equal_all #DEFINE_ALIAS from .tensor.manipulation import cast #DEFINE_ALIAS from .tensor.manipulation import concat #DEFINE_ALIAS from .tensor.manipulation import expand #DEFINE_ALIAS +from .tensor.manipulation import broadcast_to #DEFINE_ALIAS from .tensor.manipulation import expand_as #DEFINE_ALIAS +from .tensor.manipulation import tile #DEFINE_ALIAS from .tensor.manipulation import flatten #DEFINE_ALIAS from .tensor.manipulation import gather #DEFINE_ALIAS from .tensor.manipulation import gather_nd #DEFINE_ALIAS @@ -123,6 +132,7 @@ from .tensor.manipulation import unstack #DEFINE_ALIAS from .tensor.manipulation import flip #DEFINE_ALIAS from .tensor.manipulation import unbind #DEFINE_ALIAS from .tensor.manipulation import roll #DEFINE_ALIAS +from .tensor.manipulation import chunk #DEFINE_ALIAS from .tensor.math import abs #DEFINE_ALIAS from .tensor.math import acos #DEFINE_ALIAS from .tensor.math import asin #DEFINE_ALIAS @@ -166,7 +176,11 @@ from .tensor.math import maximum #DEFINE_ALIAS from .tensor.math import min #DEFINE_ALIAS from .tensor.math import minimum #DEFINE_ALIAS from .tensor.math import mm #DEFINE_ALIAS -from .tensor.math import div #DEFINE_ALIAS +from .tensor.math import divide #DEFINE_ALIAS +from .tensor.math import floor_divide #DEFINE_ALIAS +from .tensor.math import remainder #DEFINE_ALIAS +from .tensor.math import mod #DEFINE_ALIAS +from .tensor.math import floor_mod #DEFINE_ALIAS from .tensor.math import multiply #DEFINE_ALIAS from .tensor.math import add #DEFINE_ALIAS from .tensor.math import atan #DEFINE_ALIAS @@ -176,11 +190,16 @@ from .tensor.math import log1p #DEFINE_ALIAS from .tensor.math import erf #DEFINE_ALIAS from .tensor.math import addcmul #DEFINE_ALIAS from .tensor.math import addmm #DEFINE_ALIAS -from .tensor.math import clamp #DEFINE_ALIAS +from .tensor.math import clip #DEFINE_ALIAS from .tensor.math import trace #DEFINE_ALIAS from .tensor.math import kron #DEFINE_ALIAS -# from .tensor.random import gaussin #DEFINE_ALIAS -# from .tensor.random import uniform #DEFINE_ALIAS +from .tensor.math import isfinite #DEFINE_ALIAS +from .tensor.math import isinf #DEFINE_ALIAS +from .tensor.math import isnan #DEFINE_ALIAS +from .tensor.math import prod #DEFINE_ALIAS +from .tensor.random import standard_normal +from .tensor.random import normal +from .tensor.random import uniform #DEFINE_ALIAS from .tensor.random import shuffle #DEFINE_ALIAS from .tensor.random import randn #DEFINE_ALIAS from .tensor.random import rand #DEFINE_ALIAS @@ -191,7 +210,7 @@ from .tensor.search import argmin #DEFINE_ALIAS from .tensor.search import argsort #DEFINE_ALIAS from .tensor.search import has_inf #DEFINE_ALIAS from .tensor.search import has_nan #DEFINE_ALIAS -# from .tensor.search import masked_select #DEFINE_ALIAS +from .tensor.search import masked_select #DEFINE_ALIAS from .tensor.search import topk #DEFINE_ALIAS from .tensor.search import where #DEFINE_ALIAS from .tensor.search import index_select #DEFINE_ALIAS @@ -223,6 +242,8 @@ from .framework import ExponentialDecay #DEFINE_ALIAS from .framework import InverseTimeDecay #DEFINE_ALIAS from .framework import PolynomialDecay #DEFINE_ALIAS from .framework import CosineDecay #DEFINE_ALIAS +from .framework import set_default_dtype #DEFINE_ALIAS +from .framework import get_default_dtype #DEFINE_ALIAS from .tensor.search import index_sample #DEFINE_ALIAS from .tensor.stat import mean #DEFINE_ALIAS @@ -230,6 +251,10 @@ from .tensor.stat import reduce_mean #DEFINE_ALIAS from .tensor.stat import std #DEFINE_ALIAS from .tensor.stat import var #DEFINE_ALIAS from .fluid.data import data +from .tensor.stat import numel #DEFINE_ALIAS +from .device import get_cudnn_version +from .device import set_device +from .device import get_device # from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS @@ -239,6 +264,7 @@ from .incubate import hapi from .fluid.dygraph.base import enable_dygraph as disable_static #DEFINE_ALIAS from .fluid.dygraph.base import disable_dygraph as enable_static #DEFINE_ALIAS from .fluid.framework import in_dygraph_mode as in_dynamic_mode #DEFINE_ALIAS +from .fluid.dygraph.base import no_grad #DEFINE_ALIAS from . import jit from . import static diff --git a/python/paddle/device.py b/python/paddle/device.py index 894ee5b9e8b1debb2c043de30314e8ebb94d3bc0..e2ef8e7092ad3f6af91c8d5d3c0b1deaed025514 100644 --- a/python/paddle/device.py +++ b/python/paddle/device.py @@ -13,10 +13,119 @@ # limitations under the License. # TODO: define the functions to manipulate devices -# __all__ = ['cpu_places', -# 'CPUPlace', -# 'cuda_pinned_places', -# 'cuda_places', -# 'CUDAPinnedPlace', -# 'CUDAPlace', -# 'is_compiled_with_cuda'] +from paddle.fluid import core +from paddle.fluid import framework +import re + +__all__ = [ + 'get_cudnn_version', + 'set_device', + 'get_device' + # 'cpu_places', + # 'CPUPlace', + # 'cuda_pinned_places', + # 'cuda_places', + # 'CUDAPinnedPlace', + # 'CUDAPlace', + # 'is_compiled_with_cuda' +] + +_cudnn_version = None + + +def get_cudnn_version(): + """ + This funciton return the version of cudnn. the retuen value is int which represents the + cudnn version. For example, if it return 7600, it represents the version of cudnn is 7.6. + + Returns: + int: A int value which represents the cudnn version. If cudnn version is not installed, it return None. + + Examples: + .. code-block:: python + + import paddle + + cudnn_version = get_cudnn_version() + + + + """ + global _cudnn_version + if not core.is_compiled_with_cuda(): + return None + if _cudnn_version is None: + cudnn_version = int(core.cudnn_version()) + _cudnn_version = cudnn_version + if _cudnn_version < 0: + return None + else: + return cudnn_version + else: + return _cudnn_version + + +def set_device(device): + """ + Paddle supports running calculations on various types of devices, including CPU and GPU. + They are represented by string identifiers. This function can specify the global device + which the OP will run. + + Parameters: + device(str): This parameter determines the specific running device. + It can be ``cpu`` or ``gpu:0``. When ``device`` is ``cpu``, the + program is running on the cpu. When ``device`` is ``gpu``, the + program is running ont the gpu. + Examples: + + .. code-block:: python + + import paddle + paddle.enable_imperative() + paddle.fluid.dygraph.set_device("gpu:0") + x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32') + x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32') + data = paddle.stack([x1,x2], axis=1) + """ + lower_device = device.lower() + if lower_device == 'cpu': + place = core.CPUPlace() + framework._set_expected_place(place) + else: + avaliable_device = ((lower_device == 'cpu') or + re.match(r'gpu:\d+', lower_device)) + if not avaliable_device: + raise ValueError( + "The device must be a string which is like 'cpu' or 'gpu:0'") + device_info_list = device.split(':', 1) + device_id = device_info_list[1] + device_id = int(device_id) + place = core.CUDAPlace(device_id) + framework._set_expected_place(place) + + +def get_device(): + """ + This funciton can get the current global device of the program is running. + It's a string which is like 'cpu' and 'gpu:0'. if the global device is not + set, it will return a string which is 'gpu:0' when cuda is avaliable or it + will return a string which is 'cpu' when cuda is not avaliable. + + Examples: + + .. code-block:: python + + import paddle + paddle.enable_imperative() + device = paddle.fluid.dygraph.get_device() + + """ + device = '' + place = framework._current_expected_place() + if isinstance(place, core.CPUPlace): + device = 'cpu' + elif isinstance(place, core.CUDAPlace): + device_id = place.get_device_id() + device = 'gpu:' + str(device_id) + + return device diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index cc5ce0f2b74b6193652b7ec7ed4e03407df296c5..b080fb17553d4a93a545f4ae781d786d82e26576 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -12,16 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define distributed api under this directory, +# TODO: define distributed api under this directory, +from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker from .base.distributed_strategy import DistributedStrategy from .base.fleet_base import Fleet from .base.util_factory import UtilBase from .dataset import * -#from .base.role_maker import PaddleCloudRoleMaker __all__ = [ - "DistributedStrategy", "UtilBase", "DatasetFactory", "DatasetBase", - "InMemoryDataset", "QueueDataset" + "DistributedStrategy", + "UtilBase", + "DatasetFactory", + "DatasetBase", + "InMemoryDataset", + "QueueDataset", + "UserDefinedRoleMaker", + "PaddleCloudRoleMaker", ] fleet = Fleet() @@ -42,4 +48,6 @@ init_server = fleet.init_server run_server = fleet.run_server stop_worker = fleet.stop_worker distributed_optimizer = fleet.distributed_optimizer +save_inference_model = fleet.save_inference_model +save_persistables = fleet.save_persistables minimize = fleet.minimize diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 31bfd482766cb97d3c373c49774640ba8d7ba487..2971617aa705f55f193e512bf7ef75b609588c02 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -81,6 +81,8 @@ class DistributedJobInfo(object): class DistributedStrategy(object): + __lock_attr = False + def __init__(self): """ DistributedStrategy is the main configuration entry for distributed training of Paddle. @@ -95,6 +97,13 @@ class DistributedStrategy(object): """ self.strategy = distributed_strategy_pb2.DistributedStrategy() + self.__lock_attr = True + + def __setattr__(self, key, value): + if self.__lock_attr and not hasattr(self, key): + raise TypeError("%s is not a attribute of %s" % + (key, self.__class__.__name__)) + object.__setattr__(self, key, value) def save_to_prototxt(self, output): """ @@ -298,6 +307,30 @@ class DistributedStrategy(object): @property def amp_configs(self): + """ + Set automatic mixed precision training configurations. In general, amp has serveral configurable + settings that can be configured through a dict. + + **Notes**: + **init_loss_scaling(float)**: The initial loss scaling factor. Default 32768. + **use_dynamic_loss_scaling(bool)**: Whether to use dynamic loss scaling. Default True. + **incr_every_n_steps(int)**: Increases loss scaling every n consecutive steps with finite gradients. Default 1000. + **decr_every_n_nan_or_inf(int)**: Decreases loss scaling every n accumulated steps with nan or inf gradients. Default 2. + **incr_ratio(float)**: The multiplier to use when increasing the loss scaling. Default 2.0. + **decr_ratio(float)**: The less-than-one-multiplier to use when decreasing the loss scaling. Default 0.5. + **custom_white_list(list[str])**: Users' custom white list which always execution fp16. + **custom_black_list(list[str])**: Users' custom black list which forbidden execution fp16. + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.amp = True + strategy.amp_configs = { + "init_loss_scaling": 32768, + "custom_white_list": ['conv2d']} + """ return get_msg_dict(self.strategy.amp_configs) @amp_configs.setter @@ -324,6 +357,17 @@ class DistributedStrategy(object): @property def sync_nccl_allreduce(self): + """ + Indicating whether we are using synchronized all reduce in each communication thread + We note that system overhead is usually lower when sync_nccl_allreduce = True + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.sync_nccl_allreduce = True + """ return self.strategy.sync_nccl_allreduce @sync_nccl_allreduce.setter @@ -335,6 +379,18 @@ class DistributedStrategy(object): @property def use_hierarchical_allreduce(self): + """ + Indicating whether we are using hierarchical allreduce in collective communication + Hierarchical allreduce often does allreduce within a certain node group and then do + allreduce among the leaders of each group + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.use_hierarchical_allreduce = True + """ return self.strategy.use_hierarchical_allreduce @use_hierarchical_allreduce.setter @@ -348,6 +404,17 @@ class DistributedStrategy(object): @property def hierarchical_allreduce_inter_nranks(self): + """ + Number of ranks for low level node groups in hierarchical allreduce + Default value: number of GPU cards on each single GPU machine + + Example: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.hierarchical_allreduce_inter_nranks = 8 + """ return self.strategy.hierarchical_allreduce_inter_nranks @hierarchical_allreduce_inter_nranks.setter @@ -361,6 +428,19 @@ class DistributedStrategy(object): @property def sync_batch_norm(self): + """ + Indicating whether we are using sync_batch_norm to do synchronous batch normalization among all training nodes. + + Default value: False + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.sync_batch_norm = True + """ + return self.strategy.sync_batch_norm @sync_batch_norm.setter @@ -372,6 +452,17 @@ class DistributedStrategy(object): @property def fuse_all_reduce_ops(self): + """ + Indicating whether we are using fuse_all_reduce_ops for gradient fusion during backward phase of training + Default value: True + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.fuse_all_reduce_ops = False + """ return self.strategy.fuse_all_reduce_ops @fuse_all_reduce_ops.setter @@ -383,6 +474,18 @@ class DistributedStrategy(object): @property def fuse_grad_size_in_MB(self): + """ + Specifying the size of gradient to fuse in Mega-Bytes + + Default value: 32 + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.fuse_grad_size_in_MB = 50 + """ return self.strategy.fuse_grad_size_in_MB @fuse_grad_size_in_MB.setter @@ -407,6 +510,19 @@ class DistributedStrategy(object): @property def nccl_comm_num(self): + """ + Specifying the number of NCCL communicator + + Default value: 1 + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.nccl_comm_num = 2 + """ + return self.strategy.nccl_comm_num @nccl_comm_num.setter @@ -528,6 +644,20 @@ class DistributedStrategy(object): @property def dgc(self): + """ + Indicating whether we are using Deep Gradient Compression training. For more details, please refer to + [Deep Gradient Compression](https://arxiv.org/abs/1712.01887). + + Default Value: False + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.dgc = True # by default this is false + + """ return self.strategy.dgc @dgc.setter @@ -539,6 +669,28 @@ class DistributedStrategy(object): @property def dgc_configs(self): + """ + Set Deep Gradient Compression training configurations. In general, dgc has serveral configurable + settings that can be configured through a dict. + + **Notes**: + **rampup_begin_step(int)**: The beginning step from which gradient compression is implemented. Default 0. + **rampup_step(int)**: Time steps used in sparsity warm-up periods. Default is 1. + For example, if the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 100, + it will use 0.75 at 0~19 steps, and 0.9375 at 20~39 steps, and so on. And when reach sparsity array + ends, it will use 0.999 then and after. + **sparsity(list[float])**: Get top important element from gradient tensor, the ratio is (1 - sparsity). + Default is [0.999]. For example, if the sparsity is [0.99, 0.999], the top [1%, 0.1%] important + element will be transmitted. + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.dgc = True + strategy.dgc_configs = {"rampup_begin_step": 1252} + """ return get_msg_dict(self.strategy.dgc_configs) @dgc_configs.setter diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 695fd01909c0e6eecb37b34a120f156fb5fed090..c8ae8df52066ef9498996c1094cfa01c3f27b615 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -14,15 +14,32 @@ from __future__ import print_function import paddle +from .role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase from .strategy_compiler import StrategyCompiler from .distributed_strategy import DistributedStrategy from .meta_optimizer_factory import MetaOptimizerFactory from .runtime_factory import RuntimeFactory from .util_factory import UtilFactory +from paddle.fluid.wrapped_decorator import wrap_decorator __all__ = ['Fleet'] +def _inited_runtime_handler_(func): + def __impl__(*args, **kwargs): + cls = args[0] + + if cls._runtime_handle is None: + raise ValueError("Fleet can not find suitable runtime handler") + + return func(*args, **kwargs) + + return __impl__ + + +inited_runtime_handler = wrap_decorator(_inited_runtime_handler_) + + class Fleet(object): """ Unified API for distributed training of PaddlePaddle @@ -58,10 +75,35 @@ class Fleet(object): def __init__(self): self._runtime_handle = None self._util = None + self._role_maker = None + self._is_collective = False - def init(self, role_maker): - self._role_maker = role_maker + def init(self, role_maker=None, is_collective=False): + """ + Initialize role_maker in Fleet. + + This function is responsible for the distributed architecture + what you want to run your code behind,such as Transpiler, + Collective in PaddleCloudRoleMaker or UserDefinedRoleMaker + + """ + if isinstance(role_maker, RoleMakerBase): + self._role_maker = role_maker + elif role_maker == None: + if isinstance(is_collective, bool): + self._is_collective = is_collective + self._role_maker = PaddleCloudRoleMaker( + is_collective=self._is_collective) + else: + raise ValueError( + "Something wrong occurred, please check whether is_collective is bool value" + ) + else: + raise ValueError( + "Something wrong occurred, please check whether rolemaker is instance of RoleMakerBase" + ) self.strategy_compiler = StrategyCompiler() + return None def is_first_worker(self): """ @@ -182,34 +224,48 @@ class Fleet(object): """ self._role_maker.barrier_worker() + @inited_runtime_handler def init_worker(self): """ init worker """ - assert self._runtime_handle is not None self._runtime_handle._init_worker() + @inited_runtime_handler def init_server(self, *args, **kwargs): """ init server """ - assert self._runtime_handle is not None self._runtime_handle._init_server(*args, **kwargs) + @inited_runtime_handler def run_server(self): """ run server """ - assert self._runtime_handle is not None self._runtime_handle._run_server() + @inited_runtime_handler def stop_worker(self): """ stop worker """ - assert self._runtime_handle is not None self._runtime_handle._stop_worker() + def save_inference_model(self, + executor, + dirname, + feeded_var_names, + target_vars, + main_program=None, + export_for_deployment=True): + self._runtime_handle._save_inference_model( + executor, dirname, feeded_var_names, target_vars, main_program, + export_for_deployment) + + def save_persistables(self, executor, dirname, main_program=None): + self._runtime_handle._save_persistables(executor, dirname, main_program) + def distributed_optimizer(self, optimizer, strategy=None): """ distirbuted_optimizer diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 0cf909c98c057e330195135bf1b3b5b90facd2ca..6aeeb4a2896ea1d20390e463937aa07d3edd0204 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -110,6 +110,14 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") + def node_num(self): + """ + Get the training node number + Returns: + int: node num + """ + raise NotImplementedError("Please implement this method in child class") + def get_trainer_endpoints(self): """ return trainer endpoints @@ -157,10 +165,10 @@ class RoleMakerBase(object): class PaddleCloudRoleMaker(RoleMakerBase): - def __init__(self, is_collective=False, init_gloo=True, **kwargs): + def __init__(self, is_collective=False, **kwargs): super(PaddleCloudRoleMaker, self).__init__() self._is_collective = is_collective - self._init_gloo = init_gloo + self._init_gloo = False #default no init gloo self._kwargs = kwargs self._role_is_generated = False @@ -196,30 +204,35 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._prefix = os.getenv("SYS_JOB_ID", "") def _barrier(self, comm_world): - if comm_world: + if isinstance(comm_world, fluid.core.Gloo): comm_world.barrier() + else: + print("warning: must init Gloo before using _barrier() function") def _all_gather(self, comm_world, input): - if comm_world: + if isinstance(comm_world, fluid.core.Gloo): self._barrier(comm_world) output = comm_world.all_gather(input) return output else: + print("warning: must init Gloo before using _all_gather() function") return None def _all_reduce(self, comm_world, input, mode="sum"): - if not comm_world: - return None + if isinstance(comm_world, fluid.core.Gloo): - input = np.array(input) + input = np.array(input) - input_shape = input.shape - input_list = input.reshape(-1).tolist() + input_shape = input.shape + input_list = input.reshape(-1).tolist() - self._barrier(comm_world) - ans = comm_world.all_reduce(input_list, mode) - output = np.array(ans).reshape(input_shape) - return output + self._barrier(comm_world) + ans = comm_world.all_reduce(input_list, mode) + output = np.array(ans).reshape(input_shape) + return output + else: + print("warning: must init Gloo before using _all_reduce() function") + return None def is_worker(self): """ @@ -286,6 +299,14 @@ class PaddleCloudRoleMaker(RoleMakerBase): self.generate_role() return self._trainers_num + def node_num(self): + """ + return the training node number + """ + if not self._role_is_generated: + self.generate_role() + return self._node_num + def get_trainer_endpoints(self): """ get endpoint of all trainers @@ -353,6 +374,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._trainers_num = trainers_num self._role = role self._current_id = current_id + self._node_num = len( + set([x.split(':')[0] for x in self._worker_endpoints])) def _collective_env(self): self._current_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) @@ -363,6 +386,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): assert self._worker_endpoints is not None, "can't find PADDLE_TRAINER_ENDPOINTS" self._worker_endpoints = self._worker_endpoints.split(",") self._trainers_num = len(self._worker_endpoints) + self._node_num = len( + set([x.split(':')[0] for x in self._worker_endpoints])) def _init_gloo_env(self): def init_gloo_instance(role="trainer"): @@ -440,6 +465,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): if not self._role_is_generated: if not self._is_collective: self._ps_env() + if "PADDLE_WITH_GLOO" in os.environ: + self._init_gloo = bool(os.environ["PADDLE_WITH_GLOO"]) if self._init_gloo: self._init_gloo_env() else: @@ -513,12 +540,16 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._cur_endpoint = self._worker_endpoints[self._current_id] elif self._role == Role.SERVER: self._cur_endpoint = self._server_endpoints[self._current_id] + self._node_num = len( + set([x.split(':')[0] for x in self._worker_endpoints])) def _user_defined_collective_env(self): self._worker_endpoints = self._kwargs.get("worker_endpoints") self._current_id = self._kwargs.get("current_id") self._trainers_num = len(self._worker_endpoints) self._training_role = Role.Worker + self._node_num = len( + set([x.split(':')[0] for x in self._worker_endpoints])) def generate_role(self): """ diff --git a/python/paddle/distributed/fleet/base/strategy_compiler.py b/python/paddle/distributed/fleet/base/strategy_compiler.py index f0e23713e4f3f98217280f2cbe071bf1e23c823e..4097fc1237f8d7616101810f994c243dffb2cd67 100644 --- a/python/paddle/distributed/fleet/base/strategy_compiler.py +++ b/python/paddle/distributed/fleet/base/strategy_compiler.py @@ -76,6 +76,18 @@ class StrategyCompiler(StrategyCompilerBase): opt._disable_strategy(valid_strategy) return valid_strategy + """ + Meta Optimizer Type A: rewrite forward, backward. e.g. recompute, async, sync, pipeline. + results will be splitted in async, sync, pipeline + Meta Optimizer Type B: rewrite forward, + e.g. AMP and the corresponding backward is generated by rewritten forward + Meta Opitmizer Type B: rewrite backward. e.g. gradient fusion + Meta Optimizer Type D: rewrite optimize. e.g. lars, lamb, localsgd, gradient merge, dgc + Meta Optimizer Type E: only transpile to Graph structure for runtime, + currently, grad fusion and kernel fusion, sync batch-norm included. + we will remove grad fusion and sync batch-norm + """ + def generate_optimizer(self, loss, role_maker, optimizer, user_defined_strategy, meta_optimizer_list, graph_optimizer_list): @@ -102,4 +114,18 @@ class StrategyCompiler(StrategyCompilerBase): 0] return_graph = None if graph_optimizers == None else graph_optimizers[ 0] + + if meta_optimizers == None or graph_optimizers == None: + return return_meta, return_graph + + # do heuristic filter here, if any meta optimizer in graph optimizers is in + # any meta optimizers' black list, set return_graph to None + need_graph_opt = True + for graph_opt in graph_optimizers: + for program_opt in meta_optimizers: + if graph_opt.__class__.__name__ in program_opt.meta_optimizers_black_list: + need_graph_opt = False + if not need_graph_opt: + return_graph = None + return return_meta, return_graph diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py index 8316d807fa87062a8e3fba0bcb3bd057d2231032..66db14209b4c57475c30c6dde083593e27f04ea0 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py @@ -23,7 +23,12 @@ class AMPOptimizer(MetaOptimizerBase): self.inner_opt = optimizer self.amp_opt = None # we do not allow meta optimizer to be inner optimizer currently - self.meta_optimizers_white_list = [] + self.meta_optimizers_white_list = [ + "LarsOptimizer", "LambOptimizer", "RecomputeOptimizer", + "LocalSGDOptimizer", "GradientMergeOptimizer", + "GraphExecutionOptimizer" + ] + self.meta_optimizers_black_list = ["DGCOptimizer"] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -37,6 +42,7 @@ class AMPOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.amp = False + dist_strategy.amp_configs = {} def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/async_graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/async_graph_execution_optimizer.py index 890eae2c14337777349b820986132ba1b7465034..c0dee220aafd07bf69a198c6b03e6c957c50d4ce 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/async_graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/async_graph_execution_optimizer.py @@ -33,6 +33,9 @@ class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer): return True + def _disable_strategy(self, dist_strategy): + dist_strategy.a_sync_configs = {} + def _is_graph_out(self): return True diff --git a/python/paddle/distributed/fleet/meta_optimizers/async_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/async_optimizer.py index b88e863d7bec53187c9ffc7fdfcd1d264b2d3dda..b65435497284d279ebdea026e7ac88883a724c7c 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/async_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/async_optimizer.py @@ -139,4 +139,4 @@ class AsyncMetaOptimizer(MetaOptimizerBase): return None, None def _disable_strategy(self, dist_strategy): - self.user_defined_strategy.a_sync_configs["k_steps"] = -1 + self.user_defined_strategy.a_sync_configs = {} diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index c9a28fdaf11dd0d4d45cfd3fb1904b80dc136711..f34786f9dc309dd1f03319368bbc93ef1bfc03e3 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -25,6 +25,7 @@ class DGCOptimizer(MetaOptimizerBase): self.dgc_opt = None # we do not allow meta optimizer to be inner optimizer currently self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -68,11 +69,7 @@ class DGCOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.dgc = False - dist_strategy.dgc_configs = { - 'rampup_begin_step': 0, - 'rampup_step': 1, - 'sparsity': [0.999] - } + dist_strategy.dgc_configs = {} def backward(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py index 668cf605defaf5eb3f4e205c5a18548e45449a9c..bd52179a35862241768ad5bd01eedf16732ad3b6 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py @@ -16,13 +16,20 @@ from .meta_optimizer_base import MetaOptimizerBase __all__ = ["GradientMergeOptimizer"] +# amp + gradient merge + lamb + class GradientMergeOptimizer(MetaOptimizerBase): def __init__(self, optimizer): super(GradientMergeOptimizer, self).__init__(optimizer) self.inner_opt = optimizer self.wrapped_opt = GM(optimizer) - self.meta_optimizers_white_list = [] + self.meta_optimizers_white_list = [ + "LarsOptimizer", + "LambOptimizer", + "GraphExecutionOptimizer", + ] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -40,7 +47,7 @@ class GradientMergeOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.gradient_merge = False - dist_strategy.gradient_merge_configs = {"k_steps": 1, "avg": True} + dist_strategy.gradient_merge_configs = {} def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 78478b9691b2174612669a8dca3fc749f8d8a7b3..ace31687338f918ef260b3134b0bd429795542d0 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -25,6 +25,7 @@ class GraphExecutionOptimizer(MetaOptimizerBase): self.inner_opt = optimizer # we do not allow meta optimizer to be inner optimizer currently self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = [] def _is_graph_out(self): return True @@ -119,18 +120,26 @@ class GraphExecutionOptimizer(MetaOptimizerBase): local_build_strategy.nccl_comm_num = \ dist_strategy.nccl_comm_num + if self.user_defined_strategy.recompute == True: + logging.warn( + "set enable_sequential_execution=True since you have enable the recompute strategy" + ) + local_build_strategy.enable_sequential_execution = True + exe_strategy = self.user_defined_strategy.execution_strategy - node_num = self.role_maker.worker_num() + worker_num = self.role_maker.worker_num() + node_num = self.role_maker.node_num() if self.role_maker._is_collective: - assert node_num >= 1, "nccl2 node_num must >= 1, now:{}" % node_num + assert worker_num >= 1, "nccl2 worker_num must >= 1, now:{}" % worker_num - if node_num <= 1: + if worker_num <= 1: # local mode if local_build_strategy.nccl_comm_num > 1: logging.warn("set nccl_comm_num=1 since you only have 1 node.") local_build_strategy.nccl_comm_num = 1 + if node_num <= 1: if local_build_strategy.use_hierarchical_allreduce: logging.warn( "set hierachical_allreduce=False since you only have 1 node." diff --git a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py index cf4b479b52309ea2e26bdfdd55cf22efbe7d3140..7e08a02eb1dc2e14b1871fe7743bbee8ade3feb3 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py @@ -25,7 +25,8 @@ class LambOptimizer(MetaOptimizerBase): self.inner_opt = optimizer self.lamb_opt = None # we do not allow meta optimizer to be inner optimizer currently - self.meta_optimizers_white_list = [] + self.meta_optimizers_white_list = ["GraphExecutionOptimizer"] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -74,10 +75,7 @@ class LambOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.lamb = False - dist_strategy.lamb_configs = { - 'lamb_weight_decay': 0.01, - 'exclude_from_weight_decay': [], - } + dist_strategy.lamb_configs = {} def backward(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py index ff535e3ebf259cf646cb9649ee45acc409a8d0d7..09c418fa79106d05cffae1e8bc18fac9c0cc8f34 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py @@ -24,7 +24,8 @@ class LarsOptimizer(MetaOptimizerBase): self.inner_opt = optimizer self.lars_opt = None # we do not allow meta optimizer to be inner optimizer currently - self.meta_optimizers_white_list = [] + self.meta_optimizers_white_list = ["GraphExecutionOptimizer"] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -58,10 +59,7 @@ class LarsOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.lars = False - dist_strategy.lars_configs = { - 'lars_coeff': 0.001, - 'lars_weight_decay': 0.0005, - } + dist_strategy.lars_configs = {} def backward(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 05a120f8163755ad0effeccfe729f88782cfeebe..e22127c13999bfde7aa753ad1a66536913ab04f9 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -14,7 +14,7 @@ from __future__ import print_function -from paddle.fluid import program_guard, layers +from paddle.fluid import program_guard, layers, default_main_program from paddle.fluid.optimizer import Momentum, SGD from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op @@ -25,6 +25,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): super(LocalSGDOptimizer, self).__init__(optimizer) self.inner_opt = optimizer self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = ["GraphExecutionOptimizer"] self.snapshot_key = '@SNAPSHOT' def _can_apply(self): @@ -39,11 +40,35 @@ class LocalSGDOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.localsgd = False - dist_strategy.localsgd_configs = {'k_steps': 1} + dist_strategy.localsgd_configs = {} def snapshot_name(self, param_name): return param_name + self.snapshot_key + def create_snapshot_vars(self, program): + block = program.global_block() + + non_dist_params = [] + for param in block.iter_parameters(): + if not param.is_distributed: + non_dist_params.append(param) + + p2s = [] + for param in non_dist_params: + snapshot = block.create_var( + name=self.snapshot_name(param.name), + shape=param.shape, + persistable=True, + stop_gradient=True, + dtype=param.dtype) + p2s.append([param, snapshot]) + return p2s + + def init_snapshot_vars(self, startup_program, param2snapshot): + with program_guard(startup_program): + for param, snapshot in param2snapshot: + layers.assign(param, snapshot) + def minimize_impl(self, loss, startup_program=None, @@ -62,8 +87,11 @@ class LocalSGDOptimizer(MetaOptimizerBase): self.nrings = 2 collective_helper = CollectiveHelper(self.role_maker, self.nrings) collective_helper.update_startup_program(startup_program) + p2s = self.create_snapshot_vars(startup_program) + self.init_snapshot_vars(startup_program, p2s) - with program_guard(main_block.program): + p2s = self.create_snapshot_vars(main_block.program) + with program_guard(main_block.program, startup_program): step = layers.autoincreased_step_counter(begin=0) k_steps = layers.create_global_var( name="k_steps", @@ -79,6 +107,9 @@ class LocalSGDOptimizer(MetaOptimizerBase): persistable=True) if auto_steps: + avg_loss = layers.collective._c_allreduce( + loss) / self.role_maker.worker_num() + lr_0 = layers.create_global_var( name="lr_0", shape=[1], @@ -101,49 +132,32 @@ class LocalSGDOptimizer(MetaOptimizerBase): layers.cond(step == 0, initialize) def communicate(): - ordered_param_snapshot = [] + sub_block = default_main_program().current_block() ring_id = -1 - for idx, op in reversed(list(enumerate(main_block.ops))): - if is_update_op(op): - param = main_block.vars[op.input('Param')[0]] - if param.is_distributed: - continue - - snapshot = main_block.create_var( - name=self.snapshot_name(param.name), - shape=param.shape, - persistable=True, - stop_gradient=True, - dtype=param.dtype) - - main_block._insert_op( - idx + 1, - type='elementwise_sub', - inputs={'X': [snapshot], - 'Y': [param]}, - outputs={'Out': [param]}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - main_block._insert_op( - idx + 2, - type='c_sync_calc_stream', - inputs={'X': param}, - outputs={'Out': param}, - attrs={OP_ROLE_KEY: OpRole.Optimize}) - ring_id = (ring_id + 1) % self.nrings - main_block._insert_op( - idx + 3, - type='c_allreduce_sum', - inputs={'X': [param]}, - outputs={'Out': [param]}, - attrs={ - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Optimize - }) - - ordered_param_snapshot.append((param, snapshot)) + for param, snapshot in p2s: + sub_block.append_op( + type='elementwise_sub', + inputs={'X': [snapshot], + 'Y': [param]}, + outputs={'Out': [param]}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + sub_block.append_op( + type='c_sync_calc_stream', + inputs={'X': param}, + outputs={'Out': param}, + attrs={OP_ROLE_KEY: OpRole.Optimize}) + ring_id = (ring_id + 1) % self.nrings + sub_block.append_op( + type='c_allreduce_sum', + inputs={'X': [param]}, + outputs={'Out': [param]}, + attrs={ + 'ring_id': ring_id, + OP_ROLE_KEY: OpRole.Optimize + }) for ring_id in range(self.nrings): - main_block.append_op( + sub_block.append_op( type='c_sync_comm_stream', inputs={'X': param}, outputs={'Out': param}, @@ -152,10 +166,8 @@ class LocalSGDOptimizer(MetaOptimizerBase): OP_ROLE_KEY: OpRole.Optimize }) - for param_snapshot in reversed(ordered_param_snapshot): - param = param_snapshot[0] - snapshot = param_snapshot[1] - main_block.append_op( + for param, snapshot in p2s: + sub_block.append_op( type='scale', inputs={'X': [param]}, outputs={'Out': [param]}, @@ -163,13 +175,13 @@ class LocalSGDOptimizer(MetaOptimizerBase): 'scale': 1.0 / self.role_maker.worker_num(), OP_ROLE_KEY: OpRole.Optimize }) - main_block.append_op( + sub_block.append_op( type='elementwise_sub', inputs={'X': [snapshot], 'Y': [param]}, outputs={'Out': [param]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) - main_block.append_op( + sub_block.append_op( type='assign', inputs={'X': [param]}, outputs={'Out': [snapshot]}, diff --git a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py index 9ba184fb0089589a86d6444d12cf402b9687b041..12a4d904340337bf9a99968c7d82db117bf59ce8 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py +++ b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py @@ -14,10 +14,16 @@ __all__ = ["MetaOptimizerBase"] +from paddle.fluid.optimizer import Optimizer -class MetaOptimizerBase(object): + +class MetaOptimizerBase(Optimizer): def __init__(self, optimizer): - pass + self.inner_opt = optimizer + self._learning_rate = self.inner_opt._learning_rate + self._learning_rate_map = self.inner_opt._learning_rate_map + self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -26,7 +32,7 @@ class MetaOptimizerBase(object): self.user_defined_optimizer = user_defined_optimizer self.user_defined_strategy = user_defined_strategy - def _update_inner_optimier(self, optimizer): + def _update_inner_optimizer(self, optimizer): self.inner_opt = optimizer def _can_apply(self): @@ -38,17 +44,43 @@ class MetaOptimizerBase(object): def _can_update(self, optimizer): if str(optimizer.__class__.__name__) in self.meta_optimizers_white_list: return True + return False def _disable_strategy(self, dist_strategy): raise NotImplementedError("you should implement disable strategy in {}". format(type(self).__name__)) + def apply_gradients(self, params_grads): + return self.inner_opt.apply_gradients(params_grads=params_grads) + + def backward(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None, + callbacks=None): + return self.inner_opt.backward(loss, startup_program, parameter_list, + no_grad_set, callbacks) + + def apply_optimize(self, loss, startup_program, params_grads): + return self.inner_opt.apply_optimize( + loss, startup_program=startup_program, params_grads=params_grads) + def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): - raise NotImplementedError("meta optimizer not implemented") + params_grads = self.backward( + loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) + + optimize_ops = self.apply_optimize( + loss, startup_program=startup_program, params_grads=params_grads) + + return optimize_ops, params_grads def minimize(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index a42c7e63cc62a573cbf0b3441e43300f61ea0014..fe9221307cbacfa1beaf030b70a4e4b9223769cc 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -95,6 +95,7 @@ class PipelineOptimizer(MetaOptimizerBase): self.inner_opt = optimizer # we do not allow meta optimizer to be inner optimizer currently self.meta_optimizers_white_list = [] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): @@ -110,7 +111,7 @@ class PipelineOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.pipeline = False - dist_strategy.pipeline_configs = {"micro_batch": 1} + dist_strategy.pipeline_configs = {} def minimize_impl(self, loss, diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py index 73119d81094ac611c0d3545b59342b5dbd8b5d16..45130b447125f6ecbade2e4e5e3dad2f127fda52 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py @@ -24,13 +24,20 @@ class RecomputeOptimizer(MetaOptimizerBase): self.inner_opt = optimizer self.wrapped_opt = RO(optimizer) # we do not allow meta optimizer to be inner optimizer currently - self.meta_optimizers_white_list = [] + self.meta_optimizers_white_list = [ + "LarsOptimizer", + "LambOptimizer", + "GradientMergeOptimizer", + "GraphExecutionOptimizer", + ] + self.meta_optimizers_black_list = [] def _set_basic_info(self, loss, role_maker, user_defined_optimizer, user_defined_strategy): super(RecomputeOptimizer, self)._set_basic_info( loss, role_maker, user_defined_optimizer, user_defined_strategy) - self.wrapped_opt._set_checkpoints([]) + self.wrapped_opt._set_checkpoints( + list(user_defined_strategy.recompute_configs["checkpoints"])) def _can_apply(self): if self.user_defined_strategy.recompute == True: @@ -42,7 +49,7 @@ class RecomputeOptimizer(MetaOptimizerBase): def _disable_strategy(self, dist_strategy): dist_strategy.recompute = False - dist_strategy.recompute_configs = {"checkpoints": []} + dist_strategy.recompute_configs = {} def backward(self, loss, diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 813649edbcba7000cfb259888b877dcbdab05f1f..c731ed08893348d0be604eb383905cd4a9d6e228 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -13,11 +13,14 @@ # limitations under the License. import os -import logging import warnings import paddle.fluid as fluid from paddle.fluid import core +from paddle.fluid.framework import Program +from paddle.fluid.compiler import CompiledProgram +from paddle.fluid.executor import Executor +from paddle.fluid.parallel_executor import ParallelExecutor from .runtime_base import RuntimeBase @@ -241,3 +244,312 @@ class ParameterServerRuntime(RuntimeBase): self._communicator.stop() executor = fluid.Executor(fluid.CPUPlace()) executor.close() + + def _get_optimizer_status(self, op, param_name): + supported_opts = [ + "sgd", "adam", "adagrad", "adamax", "momentum", "lars_momentum", + "rmsprop", "decayed_adagrad", "ftrl" + ] + + reshaped_val_map = {} + reshaped_val_map["sgd"] = [] + reshaped_val_map["adam"] = ["moment1_0", "moment2_0"] + reshaped_val_map["adagrad"] = ["moment_0"] + reshaped_val_map["adamax"] = ["moment_0", "inf_norm_0"] + reshaped_val_map["momentum"] = ["velocity_0"] + reshaped_val_map["lars_momentum"] = ["velocity_0"] + reshaped_val_map[ + "rmsprop"] = ["momentum_0", "mean_square_0", "mean_grad_0"] + reshaped_val_map["decayed_adagrad"] = ["moment_0"] + reshaped_val_map["ftrl"] = ["squared_0", "linear_0"] + + orishaped_val_map = {} + orishaped_val_map["adam"] = ["beta1_pow_acc_0", "beta2_pow_acc_0"] + orishaped_val_map["adamax"] = ["beta1_pow_acc_0"] + + if op not in supported_opts: + raise ValueError( + "fleet can not support optimizer: {}, only this can be supported: {}". + format(op, supported_opts)) + + reshaped_names = [ + param_name + "_" + val for val in reshaped_val_map[op] + ] + + if op not in orishaped_val_map: + origin_names = [] + else: + origin_names = [ + param_name + "_" + val for val in orishaped_val_map[op] + ] + return reshaped_names, origin_names + + def _get_optimizer_op(self, param_name): + from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_optimize_ops + + opts = _get_optimize_ops(self.origin_main_program) + for op in opts: + if "Param" in op.input_names and \ + "LearningRate" in op.input_names and op.input("Param")[0] == param_name: + return op + + def _save_dense_params(self, executor, dirname, context, main_program): + self._communicator.recv() + + prog = Program() + block = prog.global_block() + local_vars = [] + + for name, var_ctx in context.items(): + if len(var_ctx.origin_varnames()) != 1: + raise ValueError("Dense can not support split now.") + + varname = var_ctx.origin_varnames()[0] + local_vars.append(varname) + + optimizer = self._get_optimizer_op(varname) + reshaped_varnames, origin_varnames = self._get_optimizer_status( + optimizer.type, varname) + + for var_name in [varname] + reshaped_varnames + origin_varnames: + var = self.origin_main_program.global_block().vars[var_name] + block.append_op( + type='recv_save', + attrs={ + "trainer_id": self.role_maker.worker_index(), + "shape": var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [var.name], + "remote_varnames": [var.name], + "is_sparse": False, + "endpoints": var_ctx.split_endpoints(), + "file_path": os.path.join(dirname, var.name) + }) + + executor.run(prog) + return local_vars + + def _save_sparse_params(self, executor, dirname, context, main_program): + prog = Program() + block = prog.global_block() + local_vars = [] + + for name, var_ctx in context.items(): + if len(var_ctx.origin_varnames()) != 1: + raise ValueError("Dense can not support split now.") + + varname = var_ctx.origin_varnames()[0] + local_vars.append(varname) + + optimizer = self._get_optimizer_op(varname) + reshaped_varnames, origin_varnames = self._get_optimizer_status( + optimizer.type, varname) + + var = self.origin_main_program.global_block().vars[varname] + slice_shapes = [] + dims1 = ",".join([str(i) for i in var.shape[1:]]) + + for section in var_ctx.sections(): + slice_shapes.append(str(section) + dims1) + + block.append_op( + type='recv_save', + attrs={ + "trainer_id": self.role_maker.worker_index(), + "shape": var.shape, + "slice_shapes": slice_shapes, + "slice_varnames": var_ctx.split_varnames(), + "remote_varnames": var_ctx.split_varnames(), + "is_sparse": True, + "endpoints": var_ctx.split_endpoints(), + "pserver_num": len(self.role_maker.get_pserver_endpoints()), + "file_path": os.path.join(dirname, var.name) + }) + + for reshaped_varname in reshaped_varnames: + var = self.origin_main_program.global_block().vars[ + reshaped_varname] + + slice_varnames = [] + remote_varnames = [] + for i in range(len(var_ctx.split_varnames())): + slice_varnames.append("{}.block{}".format(reshaped_varname, + i)) + remote_varnames.append(reshaped_varname) + + block.append_op( + type='recv_save', + attrs={ + "trainer_id": self.role_maker.worker_index(), + "shape": var.shape, + "slice_shapes": slice_shapes, + "slice_varnames": slice_varnames, + "remote_varnames": remote_varnames, + "is_sparse": True, + "endpoints": var_ctx.split_endpoints(), + "pserver_num": + len(self.role_maker.get_pserver_endpoints()), + "file_path": os.path.join(dirname, var.name) + }) + + for origin_varname in origin_varnames: + var = self.origin_main_program.global_block().vars[ + origin_varname] + + block.append_op( + type='recv_save', + attrs={ + "trainer_id": self.role_maker.worker_index(), + "shape": var.shape, + "slice_shapes": + [",".join([str(i) for i in var.shape])], + "slice_varnames": [origin_varname], + "remote_varnames": [origin_varname], + "is_sparse": False, + "endpoints": var_ctx.split_endpoints()[:1], + "file_path": os.path.join(dirname, var.name) + }) + executor.run(prog) + return context.keys() + + def _save_distributed_params(self, executor, dirname, context, + main_program): + prog = Program() + block = prog.global_block() + + for name, var_ctx in context.items(): + block.append_op( + type='checkpoint_notify', + attrs={ + "varname": name, + "is_slice": True, + "slice_varnames": var_ctx.split_varnames(), + "remote_varnames": var_ctx.split_varnames(), + "endpoints": var_ctx.split_endpoints(), + "dirname": dirname + }) + + executor.run(prog) + return context.keys() + + def _save_distributed_persistables(self, executor, dirname, main_program): + dense_ctx = self.compiled_strategy.get_communicator_recv_context( + recv_type=1) + + sparse_ctx = self.compiled_strategy.get_communicator_recv_context( + recv_type=2) + + distributed_ctx = self.compiled_strategy.get_communicator_recv_context( + recv_type=3) + + recv_dense_varnames = self._save_dense_params(executor, dirname, + dense_ctx, main_program) + + recv_sparse_varnames = self._save_sparse_params( + executor, dirname, sparse_ctx, main_program) + + recv_distributed_varnames = self._save_distributed_params( + executor, dirname, distributed_ctx, main_program) + + saved_varnames = recv_dense_varnames + list( + recv_sparse_varnames) + list(recv_distributed_varnames) + + remaining_vars = list( + filter( + ParameterServerRuntime.__exclude_vars(saved_varnames), + main_program.list_vars())) + + fluid.io.save_vars( + executor, + main_program=main_program, + dirname=dirname, + vars=remaining_vars) + + def _ps_inference_save_persistables(self, + executor, + dirname, + main_program=None, + **kwargs): + """ + This function filters out all variables with `persistable==True` from the + give `main_program` and then saves these variables to the folder `dirname` + or file `filename`. + + The `dirname` is used to specify the folder where persistable variables + are going to be saved. If you would like to save variables in separate + files, set `filename` None; if you would like to save all variables in a + single file, use `filename` to specify the file name. + """ + + if isinstance(executor, ParallelExecutor): + raise TypeError( + "in fleet.save_persistables() function, executor must be as Executor type, ParallelExecutor is not allowed" + ) + + if not isinstance(executor, Executor): + raise TypeError( + "in fleet.save_persistables() function, executor must be as Executor type" + ) + + if main_program is None: + main_program = fluid.default_main_program() + + if isinstance(main_program, CompiledProgram): + raise TypeError( + "in fleet.save_persistables() function, main_program must be as Program type, CompiledProgram is not allowed" + ) + + self._save_distributed_persistables(executor, dirname, main_program) + + def _ps_inference_save_inference_model(self, + executor, + dirname, + feeded_var_names, + target_vars, + main_program=None, + export_for_deployment=True): + """ + Prune the given `main_program` to build a new program especially for inference, + and then save it and all related parameters to given `dirname` by the `executor`. + """ + + if isinstance(executor, ParallelExecutor): + raise TypeError( + "in fleet.save_inference_model() function, executor must be as Executor type, ParallelExecutor is not allowed" + ) + + if not isinstance(executor, Executor): + raise TypeError( + "in fleet.save_inference_model() function, executor must be as Executor type" + ) + + if main_program is not None: + if isinstance(main_program, CompiledProgram): + raise TypeError( + "in fleet.save_inference_model() function, main_program must be as Program type, CompiledProgram is not allowed" + ) + fluid.io.save_inference_model(dirname, feeded_var_names, + target_vars, executor, main_program, + None, None, export_for_deployment) + else: + fluid.io.save_inference_model(dirname, feeded_var_names, + target_vars, executor, + self.origin_main_program, None, None, + export_for_deployment, True) + + model_basename = "__model__" + model_filename = os.path.join(dirname, model_basename) + + with open(model_filename, "rb") as f: + program_desc_str = f.read() + + program = Program.parse_from_string(program_desc_str) + program._copy_dist_param_info_from(fluid.default_main_program()) + self._ps_inference_save_persistables(executor, dirname, program) + + def _save_inference_model(self, *args, **kwargs): + self._ps_inference_save_inference_model(*args, **kwargs) + + def _save_persistables(self, *args, **kwargs): + self._ps_inference_save_persistables(*args, **kwargs) diff --git a/python/paddle/distributed/fleet/runtime/runtime_base.py b/python/paddle/distributed/fleet/runtime/runtime_base.py index 38f9f882cb4876f60fa2fe1a660f2ac899c73405..2e8bacfbc3b1ded58e63e8d9e93764a0c0090b91 100644 --- a/python/paddle/distributed/fleet/runtime/runtime_base.py +++ b/python/paddle/distributed/fleet/runtime/runtime_base.py @@ -33,3 +33,9 @@ class RuntimeBase(object): def _stop_worker(self): pass + + def _save_inference_model(self, *args, **kwargs): + pass + + def _save_persistables(self, *args, **kwargs): + pass diff --git a/python/paddle/distribution.py b/python/paddle/distribution.py index fff10c5b2a9ee497cccff94346314db2c8011eb5..ba4bfa8708b2ae3d8ae6643393e0f87cc9c6b360 100644 --- a/python/paddle/distribution.py +++ b/python/paddle/distribution.py @@ -18,3 +18,517 @@ # 'Normal', # 'sampling_id', # 'Uniform'] + +from __future__ import print_function + +from .fluid.layers import control_flow +from .fluid.layers import tensor +from .fluid.layers import ops +from .fluid.layers import nn +from .fluid.framework import in_dygraph_mode +from .tensor.math import elementwise_mul, elementwise_div, elementwise_add, elementwise_sub +import math +import numpy as np +import warnings + +from .fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype + +__all__ = ['Distribution', 'Uniform', 'Normal'] + + +class Distribution(object): + """ + The abstract base class for probability distributions. Functions are + implemented in specific distributions. + """ + + def __init__(self): + super(Distribution, self).__init__() + + def sample(self): + """Sampling from the distribution.""" + raise NotImplementedError + + def entropy(self): + """The entropy of the distribution.""" + raise NotImplementedError + + def kl_divergence(self, other): + """The KL-divergence between self distributions and other.""" + raise NotImplementedError + + def log_prob(self, value): + """Log probability density/mass function.""" + raise NotImplementedError + + def probs(self, value): + """Probability density/mass function.""" + raise NotImplementedError + + def _validate_args(self, *args): + """ + Argument validation for distribution args + Args: + value (float, list, numpy.ndarray, Tensor) + Raises + ValueError: if one argument is Tensor, all arguments should be Tensor + """ + is_variable = False + is_number = False + for arg in args: + if isinstance(arg, tensor.Variable): + is_variable = True + else: + is_number = True + + if is_variable and is_number: + raise ValueError( + 'if one argument is Tensor, all arguments should be Tensor') + + return is_variable + + def _to_variable(self, *args): + """ + Argument convert args to Tensor + + Args: + value (float, list, numpy.ndarray, Tensor) + Returns: + Tensor of args. + """ + numpy_args = [] + variable_args = [] + tmp = 0. + + for arg in args: + valid_arg = False + for cls in [float, list, np.ndarray, tensor.Variable]: + if isinstance(arg, cls): + valid_arg = True + break + assert valid_arg, "type of input args must be float, list, numpy.ndarray or Tensor." + if isinstance(arg, float): + arg = np.zeros(1) + arg + arg_np = np.array(arg) + arg_dtype = arg_np.dtype + if str(arg_dtype) not in ['float32']: + warnings.warn( + "data type of argument only support float32, your argument will be convert to float32." + ) + arg_np = arg_np.astype('float32') + tmp = tmp + arg_np + numpy_args.append(arg_np) + + dtype = tmp.dtype + for arg in numpy_args: + arg_broadcasted, _ = np.broadcast_arrays(arg, tmp) + arg_variable = tensor.create_tensor(dtype=dtype) + tensor.assign(arg_broadcasted, arg_variable) + variable_args.append(arg_variable) + + return tuple(variable_args) + + +class Uniform(Distribution): + """Uniform distribution with `low` and `high` parameters. + + Mathematical Details + + The probability density function (pdf) is, + + .. math:: + + pdf(x; a, b) = \\frac{1}{Z}, \ a <=x self._quantized_var_abs_max[var_name]): - self._quantized_var_abs_max[var_name] = abs_max_value + self._sample_threshold_abs_max() elif self._algo == "min_max": - if self._quantized_var_min == {} and self._quantized_var_max == {}: - for var_name in self._quantized_weight_var_name: - var_tensor = _load_variable_data(self._scope, var_name) - min_per_channel = [] - max_per_channle = [] - for i in range(var_tensor.shape[0]): - min_per_channel.append(float(np.min(var_tensor[i]))) - max_per_channle.append(float(np.max(var_tensor[i]))) - self._quantized_var_min[var_name] = min_per_channel - self._quantized_var_max[var_name] = max_per_channle - for var_name in self._quantized_act_var_name: + self._sample_threshold_min_max() + + def _sample_threshold_abs_max(self): + assert self._algo == "abs_max", \ + "The algo should be abs_max for _sample_threshold_abs_max." + # Only calculate abs_max value for weight for once + if self._quantized_var_abs_max == {}: + for var_name in self._quantized_weight_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + if self._weight_quantize_type == "abs_max": + abs_max_value = float(np.max(np.abs(var_tensor))) + elif self._weight_quantize_type == "channel_wise_abs_max": + abs_max_value = [] + if self.weight_op_pairs[ + var_name] in _channelwise_quant_axis1_ops: + for i in range(var_tensor.shape[1]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[:, i])))) + else: + for i in range(var_tensor.shape[0]): + abs_max_value.append( + float(np.max(np.abs(var_tensor[i])))) + self._quantized_var_abs_max[var_name] = abs_max_value + + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + abs_max_value = float(np.max(np.abs(var_tensor))) + if (var_name not in self._quantized_var_abs_max) or \ + (abs_max_value > self._quantized_var_abs_max[var_name]): + self._quantized_var_abs_max[var_name] = abs_max_value + + def _sample_threshold_min_max(self): + assert self._algo == "min_max", \ + "The algo should be min_max for _sample_threshold_min_max." + if self._quantized_var_min == {} and self._quantized_var_max == {}: + for var_name in self._quantized_weight_var_name: var_tensor = _load_variable_data(self._scope, var_name) - min_value = float(np.min(var_tensor)) - max_value = float(np.max(var_tensor)) - if (var_name not in self._quantized_var_min) or \ - (min_value < self._quantized_var_min[var_name]): - self._quantized_var_min[var_name] = min_value - if (var_name not in self._quantized_var_max) or \ - (max_value > self._quantized_var_max[var_name]): - self._quantized_var_max[var_name] = max_value + if self._weight_quantize_type == "abs_max": + min_value = float(np.min(var_tensor)) + max_value = float(np.max(var_tensor)) + elif self._weight_quantize_type == "channel_wise_abs_max": + min_value = [] + max_value = [] + if self.weight_op_pairs[ + var_name] in _channelwise_quant_axis1_ops: + for i in range(var_tensor.shape[1]): + min_value.append(float(np.min(var_tensor[:, i]))) + max_value.append(float(np.max(var_tensor[:, i]))) + else: + for i in range(var_tensor.shape[0]): + min_value.append(float(np.min(var_tensor[i]))) + max_value.append(float(np.max(var_tensor[i]))) + self._quantized_var_min[var_name] = min_value + self._quantized_var_max[var_name] = max_value + + for var_name in self._quantized_act_var_name: + var_tensor = _load_variable_data(self._scope, var_name) + min_value = float(np.min(var_tensor)) + max_value = float(np.max(var_tensor)) + if (var_name not in self._quantized_var_min) or \ + (min_value < self._quantized_var_min[var_name]): + self._quantized_var_min[var_name] = min_value + if (var_name not in self._quantized_var_max) or \ + (max_value > self._quantized_var_max[var_name]): + self._quantized_var_max[var_name] = max_value def _save_input_threhold(self): ''' @@ -554,11 +591,6 @@ class PostTrainingQuantization(object): applied in every iteration. ''' assert self._algo == "KL", "The algo should be KL to sample data." - for var_name in self._quantized_weight_var_name: - if var_name not in self._sampling_data: - var_tensor = _load_variable_data(self._scope, var_name) - self._sampling_data[var_name] = var_tensor - if self._is_use_cache_file: for var_name in self._quantized_act_var_name: var_tensor = _load_variable_data(self._scope, var_name) @@ -584,15 +616,20 @@ class PostTrainingQuantization(object): # Abs_max threshold for weights for var_name in self._quantized_weight_var_name: - weight_data = self._sampling_data[var_name] - weight_threshold = None + weight_data = _load_variable_data(self._scope, var_name) if self._weight_quantize_type == "abs_max": - weight_threshold = np.max(np.abs(weight_data)) + weight_threshold = float(np.max(np.abs(weight_data))) elif self._weight_quantize_type == "channel_wise_abs_max": weight_threshold = [] - for i in range(weight_data.shape[0]): - abs_max_value = np.max(np.abs(weight_data[i])) - weight_threshold.append(abs_max_value) + if self.weight_op_pairs[ + var_name] in _channelwise_quant_axis1_ops: + for i in range(weight_data.shape[1]): + weight_threshold.append( + float(np.max(np.abs(weight_data[:, i])))) + else: + for i in range(weight_data.shape[0]): + weight_threshold.append( + float(np.max(np.abs(weight_data[i])))) self._quantized_var_kl_threshold[var_name] = weight_threshold # KL threshold for activations diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 8851bcc6440d405f7484257b44760802feb0d8fb..14d1114a8f64a1238596fb1050e2cdb6e31ec6b0 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -66,6 +66,8 @@ _out_scale_op_list = [ "concat", "elementwise_mul", "scale", + "hard_swish", + "hard_sigmoid", ] # list op real input and output names, to avoid processing input such as AxisTensor. @@ -109,8 +111,14 @@ _op_real_in_out_name = { "sigmoid": [["X"], ["Out"]], "elementwise_mul": [["X", "Y"], ["Out"]], "scale": [["X"], ["Out"]], + "hard_swish": [["X"], ["Out"]], + "hard_sigmoid": [["X"], ["Out"]], } +_conv_ops = ['conv2d', 'depthwise_conv2d', 'conv2d_transpose'] + +_channelwise_quant_axis1_ops = ['conv2d_transpose', 'mul'] + def _get_op_input_var_names(op): """ """ @@ -185,10 +193,24 @@ def _is_input_all_not_persistable(graph, op_node): return is_input_all_not_persistable +def _check_grandchild_op_node(op_node, grandchild_op_name): + ''' + Check whether the fake_quant node has a grandchild op node named + grandchild_op_name. + ''' + for out1_var_node in op_node.outputs: + for out1_op_node in out1_var_node.outputs: + for out2_var_node in out1_op_node.outputs: + for out2_op_node in out2_var_node.outputs: + if out2_op_node.name() == grandchild_op_name: + return True + return False + + class QuantizationTransformPass(object): """ - Quantize the ops that have weights. Add quant and dequant ops for the quantized - ops's inputs. + Quantize the ops that have weights. Add quant and dequant ops for + the quantized ops's inputs. """ _supported_quantizable_op_type = [ 'conv2d', 'depthwise_conv2d', 'conv2d_transpose', 'mul', 'matmul' @@ -311,8 +333,8 @@ class QuantizationTransformPass(object): if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be " - "'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'." - % (str(weight_quantize_type))) + "'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' " + "or 'moving_average_abs_max'." % (str(weight_quantize_type))) self._activation_quantize_type = activation_quantize_type self._weight_quantize_type = weight_quantize_type @@ -323,7 +345,6 @@ class QuantizationTransformPass(object): for op in self._quantizable_ops: assert op in QuantizationTransformPass._supported_quantizable_op_type, \ op + " is not supported for quantization." - self._conv_ops = ['conv2d', 'depthwise_conv2d'] self._quantizable_grad_ops = [ '%s_grad' % (op) for op in self._quantizable_ops ] @@ -356,10 +377,12 @@ class QuantizationTransformPass(object): user_skipped = False if isinstance(self._skip_pattern, list): user_skipped = op_node.op().has_attr("op_namescope") and \ - any(pattern in op_node.op().attr("op_namescope") for pattern in self._skip_pattern) + any(pattern in op_node.op().attr("op_namescope") \ + for pattern in self._skip_pattern) elif isinstance(self._skip_pattern, str): user_skipped = op_node.op().has_attr("op_namescope") and \ - op_node.op().attr("op_namescope").find(self._skip_pattern) != -1 + op_node.op().attr("op_namescope").find( + self._skip_pattern) != -1 if user_skipped: op_node.op()._set_attr("skip_quant", True) @@ -373,15 +396,11 @@ class QuantizationTransformPass(object): if var_node.name() in dequantized_vars: dequant_var_node = dequantized_vars[var_node.name()] else: - name = var_node.name() if name in processed_vars: continue - - if var_node.name() in persistable_vars: - is_weight = True - else: - is_weight = False + is_weight = True if var_node.name() in persistable_vars \ + else False # if var node is weight and weight_preprocess_func is not None, # will insert weight preprocess func @@ -415,20 +434,14 @@ class QuantizationTransformPass(object): else self._activation_bits quant_type = self._weight_quantize_type if is_weight \ else self._activation_quantize_type - if quant_type == 'channel_wise_abs_max': - assert is_weight, "'channel_wise_abs_max' can only be applied on weights." - if op.name() in self._conv_ops: - quant_var_node, scale_var_node = self._insert_channel_quant_op( - graph, var_node, name, quant_bits) - dequant_var_node = self._insert_channel_dequant_op( - graph, quant_var_node, [scale_var_node], - [quant_bits]) - else: - quant_var_node, scale_var_node = self._insert_quant_op( - graph, var_node, name, quant_bits, 'abs_max') - dequant_var_node = self._insert_dequant_op( - graph, quant_var_node, scale_var_node, - quant_bits) + if quant_type == 'channel_wise_abs_max': # Weight quantization + quant_axis = 1 if op.name() in \ + _channelwise_quant_axis1_ops else 0 + quant_var_node, scale_var_node = self._insert_channel_quant_op( + graph, var_node, name, quant_bits, quant_axis) + dequant_var_node = self._insert_channel_dequant_op( + graph, quant_var_node, [scale_var_node], + [quant_bits], quant_axis) else: quant_var_node, scale_var_node = self._insert_quant_op( graph, var_node, name, quant_bits, quant_type) @@ -529,11 +542,19 @@ class QuantizationTransformPass(object): var_type=var_node.type(), shape=var_node.shape(), var_dtype=var_node.dtype()) - scale_var_node = graph.create_var_node( + scale_var_node = graph.create_persistable_node( name=self._quantized_scale_name(name), var_type=var_node.type(), shape=[1], var_dtype=var_node.dtype()) + data_type = 'float64' if var_node.dtype( + ) == core.VarDesc.VarType.FP64 else 'float32' + _init_var_node( + scale_var_node, + np.zeros( + scale_var_node.shape(), dtype=data_type), + self._scope, + self._place) quant_op_node = graph.create_op_node( op_type='fake_quantize_abs_max', attrs={ @@ -706,7 +727,8 @@ class QuantizationTransformPass(object): return quant_var_node, scale_out_node - def _insert_channel_quant_op(self, graph, var_node, name, quant_bits): + def _insert_channel_quant_op(self, graph, var_node, name, quant_bits, + quant_axis): """ Insert fake_channel_wise_quantize_abs_max op in the graph. """ @@ -717,15 +739,24 @@ class QuantizationTransformPass(object): var_type=var_node.type(), shape=var_node.shape(), var_dtype=var_node.dtype()) - scale_var_node = graph.create_var_node( + scale_var_node = graph.create_persistable_node( name=self._quantized_scale_name(name), var_type=var_node.type(), - shape=[var_node.shape()[0]], + shape=[var_node.shape()[quant_axis]], var_dtype=var_node.dtype()) + data_type = 'float64' if var_node.dtype( + ) == core.VarDesc.VarType.FP64 else 'float32' + _init_var_node( + scale_var_node, + np.zeros( + scale_var_node.shape(), dtype=data_type), + self._scope, + self._place) quant_op_node = graph.create_op_node( op_type='fake_channel_wise_quantize_abs_max', attrs={ 'bit_length': quant_bits, + 'quant_axis': quant_axis, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, inputs={'X': var_node}, @@ -763,7 +794,7 @@ class QuantizationTransformPass(object): return dequant_var_node def _insert_channel_dequant_op(self, graph, var_node, scale_var_nodes, - quant_bits): + quant_bits, quant_axis): """ Insert fake_channel_wise_dequantize_max_abs in the graph. """ @@ -778,6 +809,7 @@ class QuantizationTransformPass(object): op_type='fake_channel_wise_dequantize_max_abs', attrs={ 'quant_bits': quant_bits, + 'quant_axis': quant_axis, 'op_role': core.op_proto_and_checker_maker.OpRole.Forward }, inputs={'X': var_node, @@ -1036,7 +1068,6 @@ class QuantizationFreezePass(object): self._weight_bits = weight_bits self._activation_bits = activation_bits self._weight_quantize_type = weight_quantize_type - self._conv_ops = ['conv2d', 'depthwise_conv2d', 'conv2d_transpose'] self._fake_quant_op_names = _fake_quant_op_list self._fake_dequant_op_names = _fake_dequant_op_list self._op_input_rename_map = collections.OrderedDict() @@ -1063,34 +1094,37 @@ class QuantizationFreezePass(object): if input_arg_name in graph.out_node_mapping_table.keys(): input_arg_name = graph.out_node_mapping_table[ input_arg_name] - if input_arg_name in persistable_vars: - if self._weight_quantize_type == 'abs_max': - param = self._load_var(input_arg_name) - scale_v = np.max(np.abs(param)) - elif self._weight_quantize_type == 'channel_wise_abs_max': - param = self._load_var(input_arg_name) - if len(param.shape) == 4: # conv2d or depthwise_conv2d - scale_v = [] - for i in range(param.shape[0]): - scale_v.append(np.max(np.abs(param[i]))) - else: - scale_v = np.max(np.abs(param)) + if input_arg_name not in persistable_vars: + scale_v = graph._find_node_by_name( + op_node.outputs, op_node.output('OutScale')[0]) + self._quant_var_scale_map[input_arg_name] = scale_v + else: + # Obtain scale from OutScale var node + scale_v = self._load_var(op_node.output('OutScale')[0]) + assert scale_v.ndim in [ + 1, 2 + ], "the dim of scale_v should be 1 or 2" + if scale_v.ndim == 2: + scale_v = scale_v[0] + if scale_v.size == 1: + scale_v = scale_v[0] else: - scale_v = self._load_var( - op_node.output('OutScale')[0])[0] + scale_v = scale_v.tolist() self._quant_var_scale_map[input_arg_name] = scale_v - self._remove_fake_quant_and_dequant_op(graph, op_node) - # quantize weight and restore + # Quantize weight and restore param_v = self._load_var(input_arg_name) - quantized_param_v = self._quant(param_v, scale_v, - self._weight_bits) + if isinstance(scale_v, list) and \ + any(_check_grandchild_op_node(op_node, op) + for op in _channelwise_quant_axis1_ops): + quant_axis = 1 + else: + quant_axis = 0 + quantized_param_v = self._quant( + param_v, scale_v, self._weight_bits, quant_axis) self._restore_var(input_arg_name, quantized_param_v) - else: - scale_v = graph._find_node_by_name( - op_node.outputs, op_node.output('OutScale')[0]) - self._quant_var_scale_map[input_arg_name] = scale_v + self._remove_fake_quant_and_dequant_op(graph, op_node) - # Remove all fake dequant op +# Remove all fake dequant op ops = graph.all_op_nodes() for op_node in ops: op_name = op_node.name() @@ -1103,8 +1137,7 @@ class QuantizationFreezePass(object): op_node_desc = op_node.op() if op_node_desc.has_attr("quantization_type") and \ op_node_desc.attr("quantization_type") == "qat_with_weight": - if self._weight_quantize_type == 'channel_wise_abs_max' \ - and op_node.name() in self._conv_ops: + if self._weight_quantize_type == 'channel_wise_abs_max': self._insert_post_channel_dequant_op(graph, op_node) else: self._insert_post_dequant_op(graph, op_node) @@ -1295,10 +1328,15 @@ class QuantizationFreezePass(object): return isinstance(v, float) or isinstance(v, np.float32) \ or isinstance(v, np.float64) - def _quant(self, x, scale, num_bits): + def _quant(self, x, scale, num_bits, quant_axis): + assert quant_axis in [0, 1], 'quant_axis should be 0 or 1 for now.' if isinstance(scale, list): for i, s in enumerate(scale): - x[i] = np.round(x[i] / s * ((1 << (num_bits - 1)) - 1)) + if quant_axis == 0: + x[i] = np.round(x[i] / s * ((1 << (num_bits - 1)) - 1)) + else: + x[:, i] = np.round(x[:, i] / s * ( + (1 << (num_bits - 1)) - 1)) return x else: return np.round(x / scale * ((1 << (num_bits - 1)) - 1)) @@ -1468,6 +1506,10 @@ class OutScaleForTrainingPass(object): for op in target_ops: for output_var_name in _get_op_output_var_names(op): in_node = graph._find_node_by_name(op.outputs, output_var_name) + if in_node.dtype() not in \ + [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: + continue + scale_node = graph.create_persistable_node( name=self._scale_name(in_node.name()), var_type=core.VarDesc.VarType.LOD_TENSOR, @@ -1570,17 +1612,26 @@ class OutScaleForInferencePass(object): if op_node.name() in self._teller_set: var_names = _get_op_output_var_names(op_node) for var_name in var_names: - # For compatibility, we save output threshold by two methods. + in_node = graph._find_node_by_name(op_node.outputs, + var_name) + if in_node.dtype() not in \ + [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]: + continue + scale_name = self._scale_name(var_name) - scale_v = np.array( - self._scope.find_var(scale_name).get_tensor())[0] - op_node.op()._set_attr("out_threshold", float(scale_v)) + scale_var = self._scope.find_var(scale_name) + assert scale_var is not None, \ + "Can not find {} variable in the scope".format(scale_name) + scale_value = np.array(scale_var.get_tensor())[0] + + # For compatibility, we save output threshold by two methods. + op_node.op()._set_attr("out_threshold", float(scale_value)) argname_index = _get_output_name_index(op_node, var_name) assert argname_index is not None, \ var_name + " is not the output of the op" op_node.op()._set_attr(argname_index[0] + str(argname_index[1]) \ - + "_threshold", float(scale_v)) + + "_threshold", float(scale_value)) graph.resolve_hazard() return graph diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index df7e585d45f445067b3a700951418c06c9062ae7..a5f2f9421329cd97d03efa3507120203fcdc747b 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -123,6 +123,7 @@ endfunction() if(WIN32) list(REMOVE_ITEM TEST_OPS test_light_nas) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mnist) list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1) list(REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50) list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1) @@ -263,6 +264,13 @@ list(REMOVE_ITEM TEST_OPS #TODO(wanghaoshuang): Fix this unitest failed on GCC8. LIST(REMOVE_ITEM TEST_OPS test_auto_pruning) LIST(REMOVE_ITEM TEST_OPS test_filter_pruning) + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() + +# setting timeout value for old unittests +if(NOT WIN32) + set_tests_properties(test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 200) + set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 200) +endif() diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..3ac1590b8aa6eaefbccd3907b314fb438386ffc6 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -0,0 +1,226 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. +import unittest +import os +import time +import sys +import random +import math +import functools +import contextlib +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.dataset.common import download +from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization + +random.seed(0) +np.random.seed(0) + + +class TestPostTrainingQuantization(unittest.TestCase): + def setUp(self): + self.download_path = 'int8/download' + self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + + self.download_path) + self.timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) + self.int8_model_path = os.path.join(os.getcwd(), + "post_training_" + self.timestamp) + try: + os.system("mkdir -p " + self.int8_model_path) + except Exception as e: + print("Failed to create {} due to {}".format(self.int8_model_path, + str(e))) + sys.exit(-1) + + def tearDown(self): + try: + os.system("rm -rf {}".format(self.int8_model_path)) + except Exception as e: + print("Failed to delete {} due to {}".format(self.int8_model_path, + str(e))) + + def cache_unzipping(self, target_folder, zip_path): + if not os.path.exists(target_folder): + cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, + zip_path) + os.system(cmd) + + def download_model(self, data_url, data_md5, folder_name): + download(data_url, self.download_path, data_md5) + file_name = data_url.split('/')[-1] + zip_path = os.path.join(self.cache_folder, file_name) + print('Data is downloaded at {0}'.format(zip_path)) + + data_cache_folder = os.path.join(self.cache_folder, folder_name) + self.cache_unzipping(data_cache_folder, zip_path) + return data_cache_folder + + def run_program(self, model_path, batch_size, infer_iterations): + print("test model path:" + model_path) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + [infer_program, feed_dict, fetch_targets] = \ + fluid.io.load_inference_model(model_path, exe) + val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size) + + img_shape = [1, 28, 28] + test_info = [] + cnt = 0 + periods = [] + for batch_id, data in enumerate(val_reader()): + image = np.array( + [x[0].reshape(img_shape) for x in data]).astype("float32") + input_label = np.array([x[1] for x in data]).astype("int64") + + t1 = time.time() + out = exe.run(infer_program, + feed={feed_dict[0]: image}, + fetch_list=fetch_targets) + t2 = time.time() + period = t2 - t1 + periods.append(period) + + out_label = np.argmax(np.array(out[0]), axis=1) + top1_num = sum(input_label == out_label) + test_info.append(top1_num) + cnt += len(data) + + if (batch_id + 1) == infer_iterations: + break + + throughput = cnt / np.sum(periods) + latency = np.average(periods) + acc1 = np.sum(test_info) / cnt + return (throughput, latency, acc1) + + def generate_quantized_model(self, + model_path, + algo="KL", + quantizable_op_type=["conv2d"], + is_full_quantize=False, + is_use_cache_file=False, + is_optimize_model=False, + batch_size=10, + batch_nums=10): + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + scope = fluid.global_scope() + val_reader = paddle.dataset.mnist.train() + + ptq = PostTrainingQuantization( + executor=exe, + model_dir=model_path, + sample_generator=val_reader, + batch_size=batch_size, + batch_nums=batch_nums, + algo=algo, + quantizable_op_type=quantizable_op_type, + is_full_quantize=is_full_quantize, + optimize_model=is_optimize_model, + is_use_cache_file=is_use_cache_file) + ptq.quantize() + ptq.save_quantized_model(self.int8_model_path) + + def run_test(self, + model_name, + data_url, + data_md5, + algo, + quantizable_op_type, + is_full_quantize, + is_use_cache_file, + is_optimize_model, + diff_threshold, + batch_size=10, + infer_iterations=10, + quant_iterations=5): + + origin_model_path = self.download_model(data_url, data_md5, model_name) + origin_model_path = os.path.join(origin_model_path, model_name) + + print("Start FP32 inference for {0} on {1} images ...".format( + model_name, infer_iterations * batch_size)) + (fp32_throughput, fp32_latency, fp32_acc1) = self.run_program( + origin_model_path, batch_size, infer_iterations) + + print("Start INT8 post training quantization for {0} on {1} images ...". + format(model_name, quant_iterations * batch_size)) + self.generate_quantized_model( + origin_model_path, algo, quantizable_op_type, is_full_quantize, + is_use_cache_file, is_optimize_model, batch_size, quant_iterations) + + print("Start INT8 inference for {0} on {1} images ...".format( + model_name, infer_iterations * batch_size)) + (int8_throughput, int8_latency, int8_acc1) = self.run_program( + self.int8_model_path, batch_size, infer_iterations) + + print("---Post training quantization of {} method---".format(algo)) + print( + "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.". + format(model_name, batch_size, fp32_throughput, fp32_latency, + fp32_acc1)) + print( + "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n". + format(model_name, batch_size, int8_throughput, int8_latency, + int8_acc1)) + sys.stdout.flush() + + delta_value = fp32_acc1 - int8_acc1 + self.assertLess(delta_value, diff_threshold) + + +class TestPostTrainingKLForMnist(TestPostTrainingQuantization): + def test_post_training_kl(self): + model_name = "mnist_model" + data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" + data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" + algo = "KL" + quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"] + is_full_quantize = False + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.01 + batch_size = 10 + infer_iterations = 50 + quant_iterations = 5 + self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold, batch_size, infer_iterations, + quant_iterations) + + +class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization): + def test_post_training_abs_max(self): + model_name = "mnist_model" + data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz" + data_md5 = "be71d3997ec35ac2a65ae8a145e2887c" + algo = "abs_max" + quantizable_op_type = ["conv2d", "mul"] + is_full_quantize = True + is_use_cache_file = False + is_optimize_model = True + diff_threshold = 0.01 + batch_size = 10 + infer_iterations = 50 + quant_iterations = 10 + self.run_test(model_name, data_url, data_md5, algo, quantizable_op_type, + is_full_quantize, is_use_cache_file, is_optimize_model, + diff_threshold, batch_size, infer_iterations, + quant_iterations) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py index c9ea15bf6cde9af16810920f53a7d5e045a852e3..32292c8a47b50bc5e7eb2d7833823e586eea8909 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py +++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py @@ -33,34 +33,29 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" -def residual_block(img, label, num=1): - def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu', - bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - use_cudnn=False, - act=None, - bias_attr=bias_attr) - return fluid.layers.batch_norm(input=tmp, act=act) - - hidden = img - for _ in six.moves.xrange(num): - conv = conv_bn_layer(hidden, 20, 3, 1, 1, act=None, bias_attr=True) - short = conv_bn_layer(hidden, 20, 1, 1, 0, act=None) - hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') - fc = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=fc, label=label) - loss = fluid.layers.mean(loss) - return loss +def conv_net(img, label): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + pool_type='max', + act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + pool_type='avg', + act="relu") + hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu') + prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + return avg_loss def pact(x, name=None): @@ -102,7 +97,7 @@ class TestUserDefinedQuantization(unittest.TestCase): img.stop_gradient = False label = fluid.layers.data( name='label', shape=[1], dtype='int64') - loss = residual_block(img, label, 1) + loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.SGD(learning_rate=0.0001) opt.minimize(loss) diff --git a/python/paddle/fluid/contrib/tests/test_distributed_reader.py b/python/paddle/fluid/contrib/tests/test_distributed_reader.py index 51e1455e71ecfe3f347977bea17a56e556c5ce0d..b964168eb3a2f14fa6dd55d189592daa6ec93d3c 100644 --- a/python/paddle/fluid/contrib/tests/test_distributed_reader.py +++ b/python/paddle/fluid/contrib/tests/test_distributed_reader.py @@ -36,8 +36,9 @@ class TestDistributedReader(unittest.TestCase): data = next(reader()) assert data == 1 - os.unsetenv('PADDLE_TRAINER_ID') - os.unsetenv('PADDLE_TRAINERS_NUM') + #Note: windows python3 don't have unsetenv + del os.environ['PADDLE_TRAINER_ID'] + del os.environ['PADDLE_TRAINERS_NUM'] if __name__ == '__main__': diff --git a/python/paddle/fluid/data.py b/python/paddle/fluid/data.py index 2c75c493cba02dc21a5e2518a8a5e52b6eb4fd81..dc57e9f71ed3d0de1a374bdf719b32a083198b31 100644 --- a/python/paddle/fluid/data.py +++ b/python/paddle/fluid/data.py @@ -18,17 +18,14 @@ import six from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.data_feeder import check_dtype, check_type +from ..utils import deprecated __all__ = ['data'] +@deprecated(since="2.0.0", update_to="paddle.static.data") def data(name, shape, dtype='float32', lod_level=0): """ - :api_attr: Static Graph - :alias_main: paddle.nn.data - :alias: paddle.nn.data,paddle.nn.input.data - :old_api: paddle.fluid.data - **Data Layer** This function creates a variable on the global block. The global variable @@ -52,7 +49,7 @@ def data(name, shape, dtype='float32', lod_level=0): The default :code:`stop_gradient` attribute of the Variable created by this API is true, which means the gradient won't be passed backward - through the data Varaible. Set :code:`var.stop_gradient = False` If + through the data Variable. Set :code:`var.stop_gradient = False` If user would like to pass backward gradient. Args: @@ -88,7 +85,7 @@ def data(name, shape, dtype='float32', lod_level=0): z = x + y - # In this example, we will feed x and y with np-ndarry "1" + # In this example, we will feed x and y with np-ndarray "1" # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index e8d708e04ce54bf6589ada0a55de13f06f0ba2a9..45aa85d4168a55e206460ce2e39292013caa9ce0 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -50,14 +50,15 @@ def convert_dtype(dtype): elif isinstance(dtype, type): if dtype in [ np.bool, np.float16, np.float32, np.float64, np.int8, np.int16, - np.int32, np.int64, np.uint8 + np.int32, np.int64, np.uint8, np.complex64, np.complex128 ]: return dtype.__name__ else: if dtype in [ 'bool', 'float16', 'float32', 'float64', 'int8', 'int16', - 'int32', 'int64', 'uint8', u'bool', u'float16', u'float32', - u'float64', u'int8', u'int16', u'int32', u'int64', u'uint8' + 'int32', 'int64', 'uint8', 'complex64', 'complex128', u'bool', + u'float16', u'float32', u'float64', u'int8', u'int16', u'int32', + u'int64', u'uint8', u'complex64', u'complex128' ]: # this code is a little bit dangerous, since error could happen # when casting no-ascii code to str in python2. @@ -68,7 +69,7 @@ def convert_dtype(dtype): raise TypeError( "dtype must be any of [bool, float16, float32, float64, int8, int16, " - "int32, int64, uint8], but received %s" % dtype) + "int32, int64, uint8, complex64, complex128], but received %s" % dtype) def check_variable_and_dtype(input, diff --git a/python/paddle/fluid/dataloader/__init__.py b/python/paddle/fluid/dataloader/__init__.py index 2f15811e4f360d25e4697ccd121cbcf82fea6a52..597f1f217483ccafe73d0b4fe337cb2b24b4b436 100644 --- a/python/paddle/fluid/dataloader/__init__.py +++ b/python/paddle/fluid/dataloader/__init__.py @@ -23,6 +23,10 @@ from .batch_sampler import * from . import dataloader_iter from .dataloader_iter import * +from . import sampler +from .sampler import * + __all__ = dataset.__all__ \ + batch_sampler.__all__ \ - + dataloader_iter.__all__ + + dataloader_iter.__all__ \ + + sampler.__all__ diff --git a/python/paddle/fluid/dataloader/batch_sampler.py b/python/paddle/fluid/dataloader/batch_sampler.py index 811468c523b2fbe01fbee265733d9754fbc93040..1d180329b72510de5e7e9362e4c002f4508ba1be 100644 --- a/python/paddle/fluid/dataloader/batch_sampler.py +++ b/python/paddle/fluid/dataloader/batch_sampler.py @@ -16,12 +16,13 @@ from __future__ import print_function from __future__ import division import numpy as np +from .sampler import Sampler, SequenceSampler, RandomSampler from .dataset import Dataset, IterableDataset __all__ = ["BatchSampler"] -class BatchSampler(object): +class BatchSampler(Sampler): """ A base implement of batch sampler used by `paddle.io.DataLoader` which yield mini-batch indices(a list/tuple with length as @@ -41,10 +42,11 @@ class BatchSampler(object): implement or other python object which implemented :code:`__len__` for BatchSampler to get indices as the range of :attr:`dataset` length. Default None. - indices (list|tuple): a substitution parameter for - :attr:`dataset` either :attr:`dataset` or - :attr:`indices` should be set, give the whole - indices to sampler from directly. Default None. + sampler (Sampler): this could be a :code:`paddle.io.Dataset` + instance which implemented :code:`__iter__` to yield + sample indices. :attr:`sampler` and :attr:`dataset` + can not be set in the same time. If :attr:`sampler` + is set, :attr:`shuffle` should not be set. Default None. shuffle(bool): whether to shuffle indices order before genrating batch indices. Default False. batch_size(int): sample indice number in a mini-batch indices. @@ -58,16 +60,7 @@ class BatchSampler(object): .. code-block:: python - from paddle.io import BatchSampler, Dataset - - # init with indices - bs = BatchSampler(indices=list(range(100)), - shuffle=True, - batch_size=8, - drop_last=True) - - for batch_indices in bs: - print(batch_indices) + from paddle.io import RandomSampler, BatchSampler, Dataset # init with dataset class RandomDataset(Dataset): @@ -90,55 +83,57 @@ class BatchSampler(object): for batch_indices in bs: print(batch_indices) + # init with sampler + sampler = RandomSampler(RandomDataset(100)) + bs = BatchSampler(sampler=sampler, + batch_size=8, + drop_last=True) + + for batch_indices in bs: + print(batch_indices) + + see `paddle.io.DataLoader` """ def __init__(self, dataset=None, - indices=None, + sampler=None, shuffle=False, batch_size=1, drop_last=False): if dataset is None: - assert indices is not None, \ - "either dataset or indices should be set" - assert isinstance(indices, list) or isinstance(indices, tuple), \ - "indices should be a list or tuple, but got {}".format(type(indices)) - self.indices = indices - self.sampler_iter = None + assert sampler is not None, \ + "either dataset or sampler should be set" + assert isinstance(sampler, Sampler), \ + "sampler should be a paddle.io.Sampler, but got {}".format(type(sampler)) + assert not shuffle, "shuffle should be False when sampler is set" + self.sampler = sampler else: - if isinstance(dataset, IterableDataset): - self.sampler_iter = iter( - _InfiniteIterableSampler(dataset, batch_size)) + assert isinstance(dataset, Dataset), \ + "dataset should be a paddle.io.Dataset" + assert not isinstance(dataset, IterableDataset), \ + "dataset should not be a paddle.io.IterableDataset" + assert sampler is None, \ + "should not set both dataset and sampler" + assert isinstance(shuffle, bool), \ + "shuffle should be a boolean value, but got {}".format(type(shuffle)) + if shuffle: + self.sampler = RandomSampler(dataset) else: - self.sampler_iter = None - assert isinstance(dataset, Dataset), \ - "dataset should be an instance of paddle.io.Dataset" - assert indices is None, \ - "should not set both dataset and indices" - self.indices = list(range(len(dataset))) + self.sampler = SequenceSampler(dataset) assert isinstance(batch_size, int) and batch_size > 0, \ "batch_size should be a positive integer, but got {}".format(batch_size) self.batch_size = batch_size - assert isinstance(shuffle, bool), \ - "shuffle should be a boolean value, but got {}".format(type(shuffle)) - self.shuffle = shuffle assert isinstance(drop_last, bool), \ "drop_last should be a boolean value, but got {}".format(type(drop_last)) self.drop_last = drop_last def __iter__(self): - if self.sampler_iter: - yield next(self.sampler_iter) - - if self.shuffle: - np.random.shuffle(self.indices) - _iter = iter(self.indices) - batch_indices = [] - for idx in _iter: + for idx in self.sampler: batch_indices.append(idx) if len(batch_indices) == self.batch_size: yield batch_indices @@ -147,10 +142,7 @@ class BatchSampler(object): yield batch_indices def __len__(self): - if self.sampler_iter: - raise RuntimeError("'{}' should not be called for IterableDataset". - format('__len__')) - num_samples = len(self.indices) + num_samples = len(self.sampler) num_samples += int(not self.drop_last) * (self.batch_size - 1) return num_samples // self.batch_size diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index 5cb831eee3a4b0497419ae5eec2972b4cda9e60b..a81d73d7e9a621d2a02ed91541f32b827bdff38c 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -359,6 +359,9 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): self._outstanding_capacity = 2 * max(self._num_workers, len(self._places)) + # see _try_put_indices + self._thread_lock = threading.Lock() + # init workers and indices queues and put 2 indices in each indices queue self._init_workers() for _ in range(self._outstanding_capacity): @@ -660,22 +663,32 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase): def _try_put_indices(self): assert self._batches_outstanding <= self._outstanding_capacity, \ "too many indices have been put to queue" - try: - indices = next(self._sampler_iter) - except StopIteration: - return + # In multi-process mode for IterableDataset, _try_put_indices will + # be called both in main process(for our implement has blocking queue, + # and blocking queue read is in main process) and thread, which may + # cause error following error + # 1. "ValueError: generator already executing" in next(self._sampler_iter) + # 2. re-enter in increase _send_idx + # add a lock for threading save, for _try_put_indices is only a slight + # function which is not in data reading pipeline, this lock almost no + # influence on performance + with self._thread_lock: + try: + indices = next(self._sampler_iter) + except StopIteration: + return - for i in range(self._num_workers): - worker_idx = next(self._workers_idx_cycle) - if self._worker_status[worker_idx]: - break - else: - return + for i in range(self._num_workers): + worker_idx = next(self._workers_idx_cycle) + if self._worker_status[worker_idx]: + break + else: + return - self._indices_queues[worker_idx].put((self._send_idx, indices)) - self._task_infos[self._send_idx] = (worker_idx, ) - self._batches_outstanding += 1 - self._send_idx += 1 + self._indices_queues[worker_idx].put((self._send_idx, indices)) + self._task_infos[self._send_idx] = (worker_idx, ) + self._batches_outstanding += 1 + self._send_idx += 1 def __del__(self): self._try_shutdown_all() diff --git a/python/paddle/fluid/dataloader/sampler.py b/python/paddle/fluid/dataloader/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..5c75fafe8b22380090ba6fb580777cdbe6570ad6 --- /dev/null +++ b/python/paddle/fluid/dataloader/sampler.py @@ -0,0 +1,236 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import numpy as np + +__all__ = ["Sampler", "SequenceSampler", "RandomSampler"] + + +class Sampler(object): + """ + An abstract class to encapsulate methods and behaviors of samplers. + + All sampler used by :code:`paddle.io.BatchSampler` should be a subclass + of :code:`paddle.io.Sampler`, BatchSampler subclasses should + implement following methods: + + :code:`__iter__`: return sample index iterably, which iterate over indices + of dataset elements + + :code:`__len__`: the number of sample in :attr:`data_source` + + + Args: + data_source(Dataset, optional): this could be an instance of + :code:`paddle.io.Dataset` other Python object which + implemented :code:`__len__` for Sampler to get indices + as the range of :attr:`dataset` length. Default None. + + Returns: + Sampler: an iterable object for sample indices iterating + + Examples: + + .. code-block:: python + + from paddle.io import Dataset, Sampler + + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([784]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + class MySampler(Sampler): + def __init__(self, data_source): + self.data_source = data_source + + def __iter__(self): + return iter(range(len(self.data_source))) + + def __len__(self): + return len(self.data_source) + + sampler = MySampler(data_source=RandomDataset(100)) + + for index in sampler: + print(index) + + see `paddle.io.BatchSampler` + see `paddle.io.DataLoader` + + """ + + def __init__(self, data_source=None): + self.data_source = data_source + + def __iter__(self): + raise NotImplementedError + + # Not define __len__ method in this base class here for __len__ + # is not needed in same sence, e.g. paddle.io.IterableDataset + + +class SequenceSampler(Sampler): + """ + Iterate samples sequentially, yield :code:`0, 1, 2, ..., len(data_source) -1` + generally, + + Args: + data_source(Dataset): dataset to sample, this could be an + instance of :code:`paddle.io.Dataset` other Python + object which implemented :code:`__len__`. + + Returns: + Sampler: a Sampler yield sample index sequentially + + Examples: + + .. code-block:: python + + from paddle.io import Dataset, SequenceSampler + + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([784]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + sampler = SequenceSampler(data_source=RandomDataset(100)) + + for index in sampler: + print(index) + + see `paddle.io.Sampler` + """ + + def __init__(self, data_source): + self.data_source = data_source + + def __iter__(self): + return iter(range(len(self.data_source))) + + def __len__(self): + return len(self.data_source) + + +class RandomSampler(Sampler): + """ + Iterate samples randomly, yield shuffled indices, if :attr:`replacement=False`, + yield shuffled indices of the whole data souce, if :attr:`replacement=True`, + :attr:`num_samples` can set to specify the sample number to draw. + + Args: + data_source(Dataset): dataset to sample, this could be an + instance of :code:`paddle.io.Dataset` other Python + object which implemented :code:`__len__`. + replacement(bool): If False, sample the whole dataset, If False, + set :attr:`num_samples` for how many sample to draw. Default False. + num_samples(int): set sample number to draw if :attr:`replacement` + is True. Default None. + generator(Generator): specify a generator to sample the data source. Default None + + Returns: + Sampler: a Sampler yield sample index randomly + + Examples: + + .. code-block:: python + + from paddle.io import Dataset, RandomSampler + + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([784]).astype('float32') + label = np.random.randint(0, 9, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + sampler = RandomSampler(data_source=RandomDataset(100)) + + for index in sampler: + print(index) + + see `paddle.io.Sampler` + """ + + def __init__(self, + data_source, + replacement=False, + num_samples=None, + generator=None): + self.data_source = data_source + self.replacement = replacement + self._num_samples = num_samples + self.generator = generator + + if not isinstance(self.replacement, bool): + raise TypeError("expect boolean value for replacement, but got " + "replacement={}".format(self.replacement)) + + if self._num_samples is not None and not replacement: + raise ValueError( + "num_samples should not be specified while replacement is False") + + if not isinstance(self.num_samples, int) or self.num_samples <= 0: + raise ValueError("num_samples should be a positive integer, " + "but got num_samples={}".format(self.num_samples)) + + @property + def num_samples(self): + if self._num_samples is None: + return len(self.data_source) + return self._num_samples + + def __iter__(self): + n = len(self.data_source) + if self.generator: + for i in range(self.num_samples): + try: + index = next(self.generator) + except StopIteration: + return + yield index + else: + if self.replacement: + for index in np.random.choice( + np.arange(n), self.num_samples, replace=True).tolist(): + yield index + else: + for index in np.random.choice( + np.arange(n), n, replace=False).tolist(): + yield index + + def __len__(self): + return self.num_samples diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index 20f48db0808b04d09ddd4537cbec2af939ad7692..fc14e9b390e6ae4d695252f064f1f0697aaee258 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -59,6 +59,8 @@ from .rnn import * from . import amp from .amp import * +from .math_op_patch import monkey_patch_math_varbase + __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 826de0588efe97de53dbe2c6530dc724a935dcf9..d4f1ca333945d8933a7a9df7ca93ea825e5cf110 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from ..wrapped_decorator import signature_safe_contextmanager, wrap_decorator +import inspect import decorator import contextlib -import functools import sys import numpy as np from paddle.fluid import core @@ -26,13 +26,8 @@ import objgraph from ..data_feeder import convert_dtype __all__ = [ - 'no_grad', - 'grad', - 'guard', - 'enable_dygraph', - 'disable_dygraph', - 'enabled', - 'to_variable', + 'no_grad', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph', 'enabled', + 'to_variable' ] @@ -96,8 +91,8 @@ def enabled(): """ This function checks whether the program runs in dynamic graph mode or not. You can enter dynamic graph mode with :ref:`api_fluid_dygraph_guard` api, - or enable and disable dynamic graph mode with :ref:`api_fluid_dygraph_enable` - and :ref:`api_fluid_dygraph_disable` api . + or enable and disable dynamic graph mode with :ref:`api_fluid_dygraph_enable_dygraph` + and :ref:`api_fluid_dygraph_disable_dygraph` api . **Note**: ``fluid.dygraph.enabled`` is the alias of ``fluid.in_dygraph_mode``, and @@ -172,77 +167,82 @@ def disable_dygraph(): _functional_dygraph_context_manager = None -@signature_safe_contextmanager -def _switch_tracer_mode_guard_(is_train=True): - tracer = framework._dygraph_tracer() - if tracer: - mode = tracer._train_mode - tracer._train_mode = is_train - try: - yield - finally: - tracer._train_mode = mode - else: - yield - - -def no_grad(func=None): +class no_grad: """ :api_attr: imperative Create a context which disables dygraph gradient calculation. - In this mode, the result of every computation will have `stop_gradient=True`. + In this mode, the result of every computation will have `stop_gradient` set + to `True`. - Also functions as a decorator. (Make sure to instantiate without parenthesis.) + Also functions as a decorator. (Make sure to use an instance.) Examples: .. code-block:: python import numpy as np - import paddle.fluid as fluid + import paddle + + paddle.disable_static() # use as generator data = np.array([[2, 3], [4, 5]]).astype('float32') - with fluid.dygraph.guard(): - l0 = fluid.Linear(2, 2) # l0.weight.gradient() is None - l1 = fluid.Linear(2, 2) - with fluid.dygraph.no_grad(): - # l1.weight.stop_gradient is False - tmp = l1.weight * 2 # tmp.stop_gradient is True - x = fluid.dygraph.to_variable(data) - y = l0(x) + tmp - o = l1(y) - o.backward() - print(tmp.gradient() is None) # True - print(l0.weight.gradient() is None) # False + l0 = paddle.nn.Linear(2, 2) # l0.weight.gradient() is None + l1 = paddle.nn.Linear(2, 2) + with paddle.no_grad(): + # l1.weight.stop_gradient is False + tmp = l1.weight * 2 # tmp.stop_gradient is True + x = paddle.to_tensor(data) + y = l0(x) + tmp + o = l1(y) + o.backward() + print(tmp.gradient() is None) # True + print(l0.weight.gradient() is None) # False # use as decorator - @fluid.dygraph.no_grad + @paddle.no_grad() def test_layer(): - with fluid.dygraph.guard(): - inp = np.ones([3, 1024], dtype='float32') - t = fluid.dygraph.base.to_variable(inp) - linear1 = fluid.Linear(1024, 4, bias_attr=False) - linear2 = fluid.Linear(4, 4) - ret = linear1(t) - dy_ret = linear2(ret) + inp = np.ones([3, 1024], dtype='float32') + t = paddle.to_tensor(inp) + linear1 = paddle.nn.Linear(1024, 4, bias_attr=False) + linear2 = paddle.nn.Linear(4, 4) + ret = linear1(t) + dy_ret = linear2(ret) test_layer() - """ - if func is None: - return _switch_tracer_mode_guard_(is_train=False) - else: + def __call__(self, func): @decorator.decorator - def __impl__(func, *args, **kwargs): - with _switch_tracer_mode_guard_(is_train=False): + def _decorate_function(func, *args, **kwargs): + with self: return func(*args, **kwargs) - return __impl__(func) + @decorator.decorator + def _decorate_generator(func, *args, **kwargs): + gen = func(*args, **kwargs) + with self: + for x in gen: + yield x + + if inspect.isgeneratorfunction(func): + return _decorate_generator(func) + else: + return _decorate_function(func) + + def __enter__(self): + tracer = framework._dygraph_tracer() + if tracer: + self.orig = tracer._train_mode + tracer._train_mode = False + + def __exit__(self, *args): + tracer = framework._dygraph_tracer() + if tracer: + tracer._train_mode = self.orig @signature_safe_contextmanager @@ -280,12 +280,11 @@ def guard(place=None): tracer = Tracer() VarBase = core.VarBase - if place is None: - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - tracer._expected_place = place + if place is not None: + expected_place = place + else: + expected_place = framework._current_expected_place() + tracer._expected_place = expected_place with framework.program_guard(train, startup): with framework.unique_name.guard(): @@ -376,47 +375,46 @@ def grad(outputs, Examples 1: .. code-block:: python - import paddle.fluid as fluid + import paddle + paddle.disable_static() def test_dygraph_grad(create_graph): - with fluid.dygraph.guard(): - x = fluid.layers.ones(shape=[1], dtype='float32') - x.stop_gradient = False - y = x * x - - # Since y = x * x, dx = 2 * x - dx = fluid.dygraph.grad( - outputs=[y], - inputs=[x], - create_graph=create_graph, - retain_graph=True)[0] - - z = y + dx - - # If create_graph = False, the gradient of dx - # would not be backpropagated. Therefore, - # z = x * x + dx, and x.gradient() = 2 * x = 2.0 - - # If create_graph = True, the gradient of dx - # would be backpropagated. Therefore, - # z = x * x + dx = x * x + 2 * x, and - # x.gradient() = 2 * x + 2 = 4.0 - - z.backward() - return x.gradient() - - print(test_dygraph_grad(create_graph=False)) # [2.] + x = paddle.ones(shape=[1], dtype='float32') + x.stop_gradient = False + y = x * x + + # Since y = x * x, dx = 2 * x + dx = paddle.grad( + outputs=[y], + inputs=[x], + create_graph=create_graph, + retain_graph=True)[0] + + z = y + dx + + # If create_graph = False, the gradient of dx + # would not be backpropagated. Therefore, + # z = x * x + dx, and x.gradient() = 2 * x = 2.0 + + # If create_graph = True, the gradient of dx + # would be backpropagated. Therefore, + # z = x * x + dx = x * x + 2 * x, and + # x.gradient() = 2 * x + 2 = 4.0 + + z.backward() + return x.gradient() + + print(test_dygraph_grad(create_graph=False)) # [2.] print(test_dygraph_grad(create_graph=True)) # [4.] Examples 2: .. code-block:: python - import paddle.fluid as fluid - - fluid.enable_dygraph() + import paddle + paddle.disable_static() def test_dygraph_grad(grad_outputs=None): - x = fluid.layers.fill_constant(shape=[1], value=2.0, dtype='float32') + x = paddle.fill_constant(shape=[1], value=2.0, dtype='float32') x.stop_gradient = False y1 = x * x @@ -432,27 +430,27 @@ def grad(outputs, # Therefore, the final result would be: # dx = 2 * x * dy1 + 3 * dy2 = 4 * dy1 + 3 * dy2. - dx = fluid.dygraph.grad( + dx = paddle.grad( outputs=[y1, y2], inputs=[x], grad_outputs=grad_outputs)[0] return dx.numpy() - THREE = fluid.layers.fill_constant(shape=[1], value=3.0, dtype='float32') - FOUR = fluid.layers.fill_constant(shape=[1], value=4.0, dtype='float32') + grad_value = paddle.fill_constant(shape=[1], value=4.0, dtype='float32') # dy1 = [1], dy2 = [1] print(test_dygraph_grad(None)) # [7.] # dy1 = [1], dy2 = [4] - print(test_dygraph_grad([None, FOUR])) # [16.] + print(test_dygraph_grad([None, grad_value])) # [16.] # dy1 = [4], dy2 = [1] - print(test_dygraph_grad([FOUR, None])) # [19.] + print(test_dygraph_grad([grad_value, None])) # [19.] # dy1 = [3], dy2 = [4] - print(test_dygraph_grad([THREE, FOUR])) # [24.] + grad_y1 = paddle.fill_constant(shape=[1], value=3.0, dtype='float32') + print(test_dygraph_grad([grad_y1, grad_value])) # [24.] ''' def check_in_out(in_out_list, name): diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py b/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py index aeece9513b57710b767322c2a7986eec087b4f8d..13f38b0726c27566ff0eda41d6c365e6a7e4aa4b 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py @@ -18,8 +18,8 @@ import collections import inspect import gast - from paddle.fluid import core +from paddle.fluid.dygraph.dygraph_to_static.utils import unwrap from paddle.fluid.framework import Program # NOTE(liym27): Please use `getattr(ast_node, ORIGI_INFO)` instead of . operation to get the original information of ast node. @@ -197,18 +197,6 @@ def attach_origin_info(ast_node, func): return ast_node -# NOTE: inspect.unwrap() exits in PY3 but not in PY2. -def unwrap(func): - def _is_wrapped(f): - return hasattr(f, '__wrapped__') - - unwrapped_f = func - while (_is_wrapped(unwrapped_f)): - unwrapped_f = unwrapped_f.__wrapped__ - - return unwrapped_f - - def ast_walk(transformed_node, static_node): """ Recursively yield all descendant nodes in the trees starting at transformed_node and static_node (including itself) in parallel. diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 88562dd40a63b3da50b34bd1cb5c1094aef1ae42..ceacba25375c64552e1e85d046ca494b078ee66d 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -13,32 +13,38 @@ # limitations under the License. from __future__ import print_function -import gast + +import collections import inspect -import warnings import textwrap import threading -import collections +import warnings + +import gast import numpy as np -from paddle.fluid import core, scope_guard -from paddle.fluid import framework +from paddle.fluid import core from paddle.fluid import executor +from paddle.fluid import framework +from paddle.fluid import scope_guard from paddle.fluid import unique_name +from paddle.fluid.data_feeder import check_type from paddle.fluid.dygraph import layers -from paddle.fluid.layers.utils import flatten -from paddle.fluid.layers.utils import pack_sequence_as +from paddle.fluid.dygraph.base import param_guard from paddle.fluid.dygraph.base import switch_to_static_graph from paddle.fluid.dygraph.dygraph_to_static.ast_transformer import DygraphToStaticAst +from paddle.fluid.dygraph.dygraph_to_static.error import ERROR_DATA +from paddle.fluid.dygraph.dygraph_to_static.error import attach_error_data +from paddle.fluid.dygraph.dygraph_to_static.origin_info import attach_origin_info +from paddle.fluid.dygraph.dygraph_to_static.origin_info import create_and_update_origin_info_map +from paddle.fluid.dygraph.dygraph_to_static.origin_info import update_op_callstack_with_origin_info +from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from +from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code -from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func +from paddle.fluid.dygraph.dygraph_to_static.utils import unwrap +from paddle.fluid.layers.utils import flatten +from paddle.fluid.layers.utils import pack_sequence_as from paddle.fluid.wrapped_decorator import signature_safe_contextmanager -from paddle.fluid.dygraph.base import param_guard -from paddle.fluid.data_feeder import check_type -from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from -from paddle.fluid.dygraph.dygraph_to_static.origin_info import attach_origin_info, create_and_update_origin_info_map -from paddle.fluid.dygraph.dygraph_to_static.origin_info import update_op_callstack_with_origin_info -from paddle.fluid.dygraph.dygraph_to_static.error import attach_error_data, ERROR_DATA __all__ = ['ProgramTranslator', 'convert_to_static'] @@ -89,7 +95,7 @@ class FunctionCache(object): """ # Note: In Python2, it will raise OSError when inspect function # with decorator directly and function.__wrapped__ holds the actual function. - func = getattr(func, '__wrapped__', func) + func = unwrap(func) source_code = func_to_source_code(func) # TODO(liym27): @@ -669,7 +675,9 @@ class ProgramTranslator(object): dygraph_func ), "Input dygraph_func is not a callable in ProgramTranslator.get_code" # Gets AST from dygraph function - raw_code = inspect.getsource(dygraph_func) + + unwrap_func = unwrap(dygraph_func) + raw_code = inspect.getsource(unwrap_func) code = textwrap.dedent(raw_code) root = gast.parse(code) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py index def201cedc242c744116f96efa6c7c1a5b0c0eb2..21e05bc6faf10110fae385b525e72e38a04da925 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py @@ -19,7 +19,6 @@ import astor import atexit import copy import gast -import imp import inspect import os import six @@ -28,6 +27,12 @@ import textwrap from paddle.fluid import unique_name +# imp is deprecated in python3 +if six.PY2: + import imp +else: + from importlib.machinery import SourceFileLoader + dygraph_class_to_static_api = { "CosineDecay": "cosine_decay", "ExponentialDecay": "exponential_decay", @@ -369,13 +374,14 @@ def ast_to_func(ast_root, dyfunc, delete_on_exit=True): function, the other inner functions are invisible for the decorated function. """ - def remove_file(filepath): + def remove_if_exit(filepath): if os.path.exists(filepath): os.remove(filepath) source = ast_to_source_code(ast_root) import_fluid = "import paddle.fluid as fluid\n" source = import_fluid + source + if six.PY2: source = source.encode('utf-8') f = tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) @@ -387,10 +393,13 @@ def ast_to_func(ast_root, dyfunc, delete_on_exit=True): f.write(source) if delete_on_exit: - atexit.register(lambda: remove_file(f.name)) - atexit.register(lambda: remove_file(f.name[:-3] + ".pyc")) + atexit.register(lambda: remove_if_exit(f.name)) + atexit.register(lambda: remove_if_exit(f.name[:-3] + ".pyc")) - module = imp.load_source(module_name, f.name) + if six.PY2: + module = imp.load_source(module_name, f.name) + else: + module = SourceFileLoader(module_name, f.name).load_module() func_name = dyfunc.__name__ if not hasattr(module, func_name): raise ValueError( @@ -1052,3 +1061,19 @@ class SplitAssignTransformer(gast.NodeTransformer): value_node = target return new_nodes + + +# NOTE: inspect.unwrap() exits in PY3 but not in PY2. +def unwrap(func): + """ + Returns the object wrapped by decorators. + """ + + def _is_wrapped(f): + return hasattr(f, '__wrapped__') + + unwrapped_f = func + while (_is_wrapped(unwrapped_f)): + unwrapped_f = unwrapped_f.__wrapped__ + + return unwrapped_f diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 0da5c57f1bc92f680335872c68ddb96079757614..ba27b2d1c631428c07a5832125cdc18634e9a4b5 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -437,8 +437,16 @@ def _load_persistable_vars(model_path, value: key for key, value in program_holder._suffix_varname_dict.items() } - # NOTE: some var may not be Parameter - for name in sorted(extra_var_info): + + # NOTE(chenweihang): we need load persistable vars based the program, + # because the program may be pruned when `save_inference_model`, some + # var in `extra_var_info` may have been pruned + for name in sorted(inv_suffix_varname_dict): + if name not in extra_var_info: + raise RuntimeError( + "The model to be loaded is not complete." + "The variable `%s` of program cannot be found in loaded model.", + name) # get suffix var name, see [why need to append suffix to persistable vars] new_name = inv_suffix_varname_dict[name] # create output varbase @@ -641,19 +649,21 @@ class TranslatedLayer(layers.Layer): # name contains `.` originally, such as `linear_0.w_0`, so here # need to generate new var name for each var self._persistable_var_name_dict = dict() - for name, var in persistable_vars.items(): - if isinstance(var, framework.ParamBase): - dy_name = _generate_unique_var_name(PARAMETER_NAME_PREFIX) - self._persistable_var_name_dict[name] = dy_name - self.add_parameter(dy_name, var) - elif isinstance(var, core.VarBase): - dy_name = _generate_unique_var_name(BUFFER_NAME_PREFIX) - self._persistable_var_name_dict[name] = dy_name - self.register_buffer(dy_name, var) - else: - raise TypeError( - "Adding persistent variable which to layer is not supported now" - ) + # the TranslatedLayer object holded var names count started from 0 + with unique_name.guard(): + for name, var in persistable_vars.items(): + if isinstance(var, framework.ParamBase): + dy_name = _generate_unique_var_name(PARAMETER_NAME_PREFIX) + self._persistable_var_name_dict[name] = dy_name + self.add_parameter(dy_name, var) + elif isinstance(var, core.VarBase): + dy_name = _generate_unique_var_name(BUFFER_NAME_PREFIX) + self._persistable_var_name_dict[name] = dy_name + self.register_buffer(dy_name, var) + else: + raise TypeError( + "Adding persistent variable which to layer is not supported now" + ) self._is_test = True diff --git a/python/paddle/fluid/dygraph/layer_object_helper.py b/python/paddle/fluid/dygraph/layer_object_helper.py index f2e914a2137d0be0606556471696fd3d255b3c12..a904f80639752a7538289a1ce7c2abf378ccc634 100644 --- a/python/paddle/fluid/dygraph/layer_object_helper.py +++ b/python/paddle/fluid/dygraph/layer_object_helper.py @@ -136,18 +136,13 @@ class LayerObjectHelper(LayerHelperBase): return param # TODO: this should not be called anymore after all activation func move to Layers - def append_activation(self, - input_var, - act=None, - use_cudnn=None, - use_mkl_dnn=None): + def append_activation(self, input_var, act=None, use_cudnn=None): """Append activation Args: input_var: the input variable. The len(input_var.shape) is larger or equal than 2. act: activation type - use_mkl_dnn: if use mkldnn use_cudnn: if use cudnn Return the Variable of after append activation @@ -163,8 +158,9 @@ class LayerObjectHelper(LayerHelperBase): if (use_cudnn is not None) and use_cudnn: act['use_cudnn'] = use_cudnn - if (use_mkl_dnn is not None) and use_mkl_dnn: - act['use_mkldnn'] = use_mkl_dnn + use_mkldnn = core.globals()["FLAGS_use_mkldnn"] + if (use_mkldnn is not None) and use_mkldnn: + act['use_mkldnn'] = use_mkldnn act_type = act.pop('type') tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 250e2b3b3882ccccd3a6582eabd565ea9cba5cc8..1ef719b9da187be659d9c898ec996b5ad0c0d8a6 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -161,7 +161,7 @@ class Layer(core.Layer): print(net.state_dict()) """ - for layer in self.sublayers(): + for layer in self.children(): layer.apply(fn) fn(self) @@ -283,7 +283,7 @@ class Layer(core.Layer): def create_parameter(self, shape, attr=None, - dtype='float32', + dtype=None, is_bias=False, default_initializer=None): """Create parameters for this layer. @@ -353,6 +353,56 @@ class Layer(core.Layer): ] return ret + def children(self): + """Returns an iterator over immediate children layers. + + Yields: + Layer: a child layer + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + fc1 = fluid.Linear(10, 3) + fc2 = fluid.Linear(3, 10, bias_attr=False) + model = fluid.dygraph.Sequential(fc1, fc2) + + layer_list = list(model.children()) + + print(layer_list) + + """ + for _, layer in self.named_children(): + yield layer + + def named_children(self): + """Returns an iterator over immediate children layers, yielding both + the name of the layer as well as the layer itself. + + Yields: + (string, Layer): Tuple containing a name and child layer + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + fc1 = fluid.Linear(10, 3) + fc2 = fluid.Linear(3, 10, bias_attr=False) + model = fluid.dygraph.Sequential(fc1, fc2) + for prefix, layer in model.named_children(): + print(prefix, layer) + + """ + memo = set() + for name, layer in self._sub_layers.items(): + if layer is not None and layer not in memo: + memo.add(layer) + yield name, layer + def sublayers(self, include_sublayers=True): """Returns a list of sub layers. @@ -503,7 +553,10 @@ class Layer(core.Layer): "The name of buffer should be a string, but received {}.". format(type(name).__name__)) elif '.' in name: - raise KeyError("The name of buffer can not contain \".\"") + raise KeyError( + "The name of buffer can not contain `.`, " + "because when you access the newly added buffer in the " + "form of `self.**.**`, it will cause AttributeError.") elif name == '': raise KeyError("The name of buffer can not be empty.") elif hasattr(self, name) and name not in self._buffers: @@ -686,20 +739,38 @@ class Layer(core.Layer): Returns: Parameter: the parameter passed in. """ - if parameter is None: - self._parameters[name] = None - elif not isinstance(parameter, framework.Parameter): + if '_parameters' not in self.__dict__: + raise RuntimeError( + "super(YourLayer, self).__init__() should be called firstly.") + elif not isinstance(name, six.string_types): + raise TypeError( + "The name of parameter should be a string, but received {}.". + format(type(name).__name__)) + elif '.' in name: + raise KeyError( + "The name of parameter can not contain `.`, " + "because when you access the newly added parameter in the " + "form of `self.**.**`, it will cause AttributeError.") + elif name == '': + raise KeyError("The name of parameter can not be empty.") + elif hasattr(self, name) and name not in self._parameters: + raise KeyError("The parameter '{}' already exists.".format(name)) + elif parameter is not None and not isinstance(parameter, + framework.Parameter): raise TypeError( - "parameter assignment requires Parameter or None, but got '{}'" - .format(type(parameter).__name__)) + "The parameter to be added should be a Parameter, but received {}.". + format(type(parameter).__name__)) + else: + if parameter is None: + self._parameters[name] = None - if len(self._loaddict_holder) > 0: - assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( - parameter.name) + if len(self._loaddict_holder) > 0: + assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( + parameter.name) - parameter.set_value(self._loaddict_holder[parameter.name]) + parameter.set_value(self._loaddict_holder[parameter.name]) - self._parameters[name] = parameter + self._parameters[name] = parameter return parameter def __getattr__(self, name): diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index d2c779a85497917179736777dac25efa7cfba228..bb55c6725e6a62f2cef393fd34b249c217be0c54 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -17,7 +17,9 @@ from __future__ import print_function from .. import core from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator from ..layers.layer_function_generator import OpProtoHolder +from ..layers import common_methods from . import to_variable, no_grad +import paddle import numpy as np import six @@ -30,6 +32,8 @@ _supported_int_dtype_ = [ core.VarDesc.VarType.INT64, ] +_already_patch_varbase = False + def monkey_patch_math_varbase(): """ @@ -37,7 +41,7 @@ def monkey_patch_math_varbase(): The difference is, in dygraph mode, use auto-generated op functions for better performance. """ - @no_grad + @no_grad() def create_tensor(value, dtype, shape): out = _varbase_creator(dtype=dtype) out = core.ops.fill_constant(out, 'dtype', dtype, 'shape', shape, @@ -140,25 +144,50 @@ def monkey_patch_math_varbase(): else: return int(var.numpy().flatten()[0]) - def _scalar_elementwise_add_(var, value): + @property + def _ndim_(var): + return len(var.shape) + + def _scalar_add_(var, value): return _scalar_elementwise_op_(var, 1.0, value) - def _scalar_elementwise_sub_(var, value): + def _scalar_sub_(var, value): return _scalar_elementwise_op_(var, 1.0, -value) - def _scalar_elementwise_rsub_(var, value): + def _scalar_rsub_(var, value): return _scalar_elementwise_op_(var, -1.0, value) - def _scalar_elementwise_mul_(var, value): + def _scalar_mul_(var, value): return _scalar_elementwise_op_(var, value, 0.0) - def _scalar_elementwise_div_(var, value): + def _scalar_div_(var, value): return _scalar_elementwise_op_(var, 1.0 / value, 0.0) - def _elemwise_method_creator_(method_name, - op_type, - reverse=False, - scalar_method=None): + # TODO(shenliang03): currently, it supports divide, floor_divide, remainder + # for binary operator by using the api to achieve the type promotion + def _binary_method_creator_(op_type, reverse=False): + import paddle + + def __impl__(self, other_var): + import paddle + op = getattr(paddle, op_type) + if reverse: + return op(other_var, self) + else: + return op(self, other_var) + + __impl__.__doc__ = """ + + See paddle.{}""".format(op_type) + __impl__.__name__ = op_type + + return __impl__ + + # for binary operator such as elementwise, compare + def _binary_creator_(method_name, + op_type, + reverse=False, + scalar_method=None): def __impl__(self, other_var): # FIXME(zjl): elementwise_div between integers cannot be converted to scale, # which may lose accuracy. This is a hot fix for release 1.6. @@ -200,60 +229,117 @@ def monkey_patch_math_varbase(): __impl__.__doc__ = """ {0} Args: - self(Variable): left hand variable - other_var(Variable|float|int): right hand variable + self(Tensor): left hand Tensor + other_var(Tensor|float|int): right hand Tensor Returns: - Variable + Tensor """.format(comment) __impl__.__name__ = method_name return __impl__ - # inject methods - for method_name, op_type, reverse, scalar_method in ( - ("__add__", "elementwise_add", False, _scalar_elementwise_add_), - # a+b == b+a. Do not need to reverse explicitly - ("__radd__", "elementwise_add", False, _scalar_elementwise_add_), - ("__sub__", "elementwise_sub", False, _scalar_elementwise_sub_), - ("__rsub__", "elementwise_sub", True, _scalar_elementwise_rsub_), - ("__mul__", "elementwise_mul", False, _scalar_elementwise_mul_), - # a*b == b*a. Do not need to reverse explicitly - ("__rmul__", "elementwise_mul", False, _scalar_elementwise_mul_), - ("__div__", "elementwise_div", False, _scalar_elementwise_div_), - ("__truediv__", "elementwise_div", False, _scalar_elementwise_div_), - ("__rdiv__", "elementwise_div", True, None), - ("__rtruediv__", "elementwise_div", True, None), - ("__pow__", "elementwise_pow", False, None), - ("__rpow__", "elementwise_pow", True, None), - ("__floordiv__", "elementwise_floordiv", False, None), - ("__mod__", "elementwise_mod", False, None), - # for logical compare - ("__eq__", "equal", False, None), - ("__ne__", "not_equal", False, None), - ("__lt__", "less_than", False, None), - ("__le__", "less_equal", False, None), - ("__gt__", "greater_than", False, None), - ("__ge__", "greater_equal", False, None)): - - setattr(core.VarBase, method_name, - _elemwise_method_creator_(method_name, op_type, reverse, - scalar_method)) - - # b = -a - core.VarBase.__neg__ = _neg_ - core.VarBase.__float__ = _float_ - core.VarBase.__long__ = _long_ - core.VarBase.__int__ = _int_ - core.VarBase.__len__ = _len_ - core.VarBase.__index__ = _index_ - core.VarBase.astype = astype - """ - When code is written like this - y = np.pi * var - ndarray.__mul__(self, var) is called, var will be traced as an array(by using __len__, __getitem__), which is not right. - when var.__array_ufunc__ is set to None, var.__rmul__(self, np) will be called. + # Todo(zhouwei): implement dygraph template to adapt to any function, receive('op_type', 'arg_template') + # Such as _method_creator_('addmm', 'x, y, alpha=1.0, beta=1.0, name=None'). It can reduce call time. + def _method_creator_(op_type, arg_template=None): + def __impl__(self): + op = getattr(core.ops, op_type) + return op(self) - The details can be seen bellow: - https://docs.scipy.org/doc/numpy-1.13.0/neps/ufunc-overrides.html#behavior-in-combination-with-python-s-binary-operations - """ - core.VarBase.__array_ufunc__ = None + __impl__.__doc__ = """ + + See paddle.{}""".format(op_type) + __impl__.__name__ = op_type + + return __impl__ + + varbase_methods = [ + # Type1: From custom fun or lambda + ## b=-a + ('__neg__', _neg_), + ('__float__', _float_), + ('__long__', _long_), + ('__int__', _int_), + ('__len__', _len_), + ('__index__', _index_), + ('astype', astype), + ('dim', lambda x: len(x.shape)), + ('ndimension', lambda x: len(x.shape)), + ('ndim', _ndim_), + ('size', lambda x: x.shape), + # Type2: From Template that create core.ops automatically. It's recommended. + ('__add__', + _binary_creator_('__add__', 'elementwise_add', False, _scalar_add_)), + ## a+b == b+a. Do not need to reverse explicitly + ('__radd__', + _binary_creator_('__radd__', 'elementwise_add', False, _scalar_add_)), + ('__sub__', _binary_creator_('__sub__', 'elementwise_sub', False, + _scalar_sub_)), + ('__rsub__', _binary_creator_('__rsub__', 'elementwise_sub', True, + _scalar_rsub_)), + ('__mul__', _binary_creator_('__mul__', 'elementwise_mul', False, + _scalar_mul_)), + ## a*b == b*a. Do not need to reverse explicitly + ('__rmul__', + _binary_creator_('__rmul__', 'elementwise_mul', False, _scalar_mul_)), + ('__rtruediv__', _binary_creator_('rtruediv__', 'elementwise_div', True, + None)), + ('__pow__', _binary_creator_('__pow__', 'elementwise_pow', False, + None)), + ('__rpow__', _binary_creator_('__rpow__', 'elementwise_pow', True, + None)), + # These binary use paddle.optype + ('__div__', _binary_method_creator_('divide', False)), + ('__truediv__', _binary_method_creator_('divide', False)), + ('__rtruediv__', _binary_method_creator_('divide', True)), + ('__rdiv__', _binary_method_creator_('divide', True)), + ('__floordiv__', _binary_method_creator_('floor_divide', False)), + ('__rfloordiv__', _binary_method_creator_('floor_divide', True)), + ('__mod__', _binary_method_creator_('remainder', False)), + ## for logical compare + ('__eq__', _binary_creator_('__eq__', 'equal', False, None)), + ('__ne__', _binary_creator_('__ne__', 'not_equal', False, None)), + ('__lt__', _binary_creator_('__lt__', 'less_than', False, None)), + ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), + ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), + ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)), + ('__array_ufunc__', None), + ('sigmoid', _method_creator_('sigmoid', 'name=None')), + ('logsigmoid', _method_creator_('logsigmoid', 'name=None')), + ('exp', _method_creator_('exp', 'name=None')), + ('tanh', _method_creator_('tanh', 'name=None')), + ('atan', _method_creator_('atan', 'name=None')), + ('tanh_shrink', _method_creator_('tanh_shrink', 'name=None')), + ('sqrt', _method_creator_('sqrt', 'name=None')), + ('rsqrt', _method_creator_('rsqrt', 'name=None')), + ('abs', _method_creator_('abs', 'name=None')), + ('ceil', _method_creator_('ceil', 'name=None')), + ('floor', _method_creator_('floor', 'name=None')), + ('cos', _method_creator_('cos', 'name=None')), + ('acos', _method_creator_('acos', 'name=None')), + ('asin', _method_creator_('asin', 'name=None')), + ('sin', _method_creator_('sin', 'name=None')), + ('sinh', _method_creator_('sinh', 'name=None')), + ('cosh', _method_creator_('cosh', 'name=None')), + ('round', _method_creator_('round', 'name=None')), + ('reciprocal', _method_creator_('reciprocal', 'name=None')), + ('square', _method_creator_('square', 'name=None')), + ('softplus', _method_creator_('softplus', 'name=None')), + ('softsign', _method_creator_('softsign', 'name=None')), + # Type3: Form module 'paddle.tensor' defaultly. + # It's not a goodway, because it will increase call time. + ] + + global _already_patch_varbase + if not _already_patch_varbase: + for method in varbase_methods: + method_name = method[0] + method_impl = method[1] + setattr(core.VarBase, method_name, method_impl) + else: + import paddle.tensor + for method_name in common_methods: + if hasattr(core.VarBase, method_name): continue + method_impl = getattr(paddle.tensor, method_name, None) + if method_impl: setattr(core.VarBase, method_name, method_impl) + + _already_patch_varbase = True diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index e56f26f1b1b9493928b106bd9e5d16afad0d94ce..dc3403358b6af25d5da001282fffe53be8bfd3d9 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -14,6 +14,7 @@ from __future__ import print_function +import paddle from six.moves import reduce from .. import core from ..layers import utils @@ -180,6 +181,7 @@ class Conv2D(layers.Layer): if not isinstance(use_cudnn, bool): raise ValueError("use_cudnn should be True or False") self._use_cudnn = use_cudnn + self._use_mkldnn = core.globals()["FLAGS_use_mkldnn"] self._filter_size = filter_size self._num_filters = num_filters self._param_attr = param_attr @@ -187,7 +189,8 @@ class Conv2D(layers.Layer): self._dtype = dtype if (self._num_channels == self._groups and - num_filters % self._num_channels == 0 and not self._use_cudnn): + num_filters % self._num_channels == 0 and + not self._use_cudnn and not self._use_mkldnn): self._l_type = 'depthwise_conv2d' else: self._l_type = 'conv2d' @@ -224,14 +227,15 @@ class Conv2D(layers.Layer): if in_dygraph_mode() and self._l_type == 'conv2d': attrs = ('strides', self._stride, 'paddings', self._padding, 'dilations', self._dilation, 'groups', self._groups - if self._groups else 1, 'use_cudnn', self._use_cudnn) + if self._groups else 1, 'use_cudnn', self._use_cudnn, + 'use_mkldnn', self._use_mkldnn) out = core.ops.conv2d(input, self.weight, *attrs) pre_bias = out - pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, self.bias, - 1) - return dygraph_utils._append_activation_in_dygraph(pre_act, - self._act) + pre_act = dygraph_utils._append_bias_in_dygraph( + pre_bias, self.bias, 1, use_mkldnn=self._use_mkldnn) + return dygraph_utils._append_activation_in_dygraph( + pre_act, self._act, use_mkldnn=self._use_mkldnn) inputs = { 'Input': [input], 'Filter': [self.weight], @@ -242,7 +246,7 @@ class Conv2D(layers.Layer): 'dilations': self._dilation, 'groups': self._groups if self._groups else 1, 'use_cudnn': self._use_cudnn, - 'use_mkldnn': False, + 'use_mkldnn': self._use_mkldnn, } check_variable_and_dtype(input, 'input', @@ -267,7 +271,8 @@ class Conv2D(layers.Layer): inputs={'X': [pre_bias], 'Y': [self.bias]}, outputs={'Out': [pre_act]}, - attrs={'axis': 1}) + attrs={'axis': 1, + 'use_mkldnn': self._use_mkldnn}) else: pre_act = pre_bias @@ -828,6 +833,8 @@ class Pool2D(layers.Layer): if not isinstance(use_cudnn, bool): raise ValueError("use_cudnn should be True or False") + self._use_mkldnn = core.globals()["FLAGS_use_mkldnn"] + if data_format not in ["NCHW", "NHWC"]: raise ValueError( "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " @@ -853,8 +860,8 @@ class Pool2D(layers.Layer): 'global_pooling', self._global_pooling, 'strides', self._pool_stride, 'paddings', self._pool_padding, 'use_cudnn', self._use_cudnn, 'ceil_mode', self._ceil_mode, - 'use_mkldnn', False, 'exclusive', self._exclusive, - 'data_format', self._data_format) + 'use_mkldnn', self._use_mkldnn, 'exclusive', + self._exclusive, 'data_format', self._data_format) return core.ops.pool2d(input, *attrs) check_variable_and_dtype( @@ -869,7 +876,7 @@ class Pool2D(layers.Layer): "paddings": self._pool_padding, "use_cudnn": self._use_cudnn, "ceil_mode": self._ceil_mode, - "use_mkldnn": False, + "use_mkldnn": self._use_mkldnn, "exclusive": self._exclusive, "data_format": self._data_format, } @@ -958,16 +965,22 @@ class Linear(layers.Layer): self.bias = self.create_parameter( shape=[output_dim], attr=bias_attr, dtype=dtype, is_bias=True) + self._use_mkldnn = core.globals()["FLAGS_use_mkldnn"] + def forward(self, input): if in_dygraph_mode(): pre_bias = _varbase_creator(dtype=input.dtype) core.ops.matmul(input, self.weight, pre_bias, 'transpose_X', False, - 'transpose_Y', False, "alpha", 1) + 'transpose_Y', False, "alpha", 1, "use_mkldnn", + self._use_mkldnn) pre_act = dygraph_utils._append_bias_in_dygraph( - pre_bias, self.bias, axis=len(input.shape) - 1) + pre_bias, + self.bias, + axis=len(input.shape) - 1, + use_mkldnn=self._use_mkldnn) - return dygraph_utils._append_activation_in_dygraph(pre_act, - self._act) + return dygraph_utils._append_activation_in_dygraph( + pre_act, self._act, use_mkldnn=self._use_mkldnn) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], "Linear") @@ -976,6 +989,7 @@ class Linear(layers.Layer): "transpose_X": False, "transpose_Y": False, "alpha": 1, + "use_mkldnn": self._use_mkldnn, } inputs = {"X": [input], "Y": [self.weight]} @@ -990,7 +1004,10 @@ class Linear(layers.Layer): inputs={'X': [tmp], 'Y': [self.bias]}, outputs={'Out': [pre_activation]}, - attrs={'axis': len(input.shape) - 1}) + attrs={ + 'axis': len(input.shape) - 1, + 'use_mkldnn': self._use_mkldnn + }) else: pre_activation = tmp return self._helper.append_activation(pre_activation, act=self._act) @@ -1250,6 +1267,7 @@ class BatchNorm(layers.Layer): self._param_attr = param_attr self._bias_attr = bias_attr self._act = act + self._use_mkldnn = core.globals()["FLAGS_use_mkldnn"] assert bias_attr is not False, "bias_attr should not be False in batch_norm." @@ -1314,8 +1332,8 @@ class BatchNorm(layers.Layer): if in_dygraph_mode(): attrs = ("momentum", self._momentum, "epsilon", self._epsilon, "is_test", not self.training, "data_layout", - self._data_layout, "use_mkldnn", False, "fuse_with_relu", - self._fuse_with_relu, "use_global_stats", + self._data_layout, "use_mkldnn", self._use_mkldnn, + "fuse_with_relu", self._fuse_with_relu, "use_global_stats", self._use_global_stats, 'trainable_statistics', self._trainable_statistics) batch_norm_out, _, _, _, _, _ = core.ops.batch_norm( @@ -1323,7 +1341,7 @@ class BatchNorm(layers.Layer): mean_out, variance_out, *attrs) return dygraph_utils._append_activation_in_dygraph( - batch_norm_out, act=self._act) + batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn) check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm') @@ -3226,19 +3244,6 @@ class Flatten(layers.Layer): self.stop_axis = stop_axis def forward(self, input): - out = self._helper.create_variable_for_type_inference(input.dtype) - x_shape = self._helper.create_variable_for_type_inference(input.dtype) - - if in_dygraph_mode(): - dy_out, _ = core.ops.flatten_contiguous_range( - input, 'start_axis', self.start_axis, 'stop_axis', - self.stop_axis) - return dy_out - self._helper.append_op( - type="flatten_contiguous_range", - inputs={"X": input}, - outputs={"Out": out, - "XShape": x_shape}, - attrs={"start_axis": self.start_axis, - "stop_axis": self.stop_axis}) + out = paddle.tensor.manipulation.flatten( + input, start_axis=self.start_axis, stop_axis=self.stop_axis) return out diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 804076f608e714b4c2623bfb580bfe09e42c8db2..54d2cda4ca6858c46140e1fbf6ac8860c3a7c78d 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -242,41 +242,38 @@ class DataParallel(layers.Layer): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - import paddle.fluid.dygraph as dygraph - from paddle.fluid.optimizer import AdamOptimizer - from paddle.fluid.dygraph.nn import Linear - from paddle.fluid.dygraph.base import to_variable + import numpy as np + import paddle.fluid as fluid - place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place=place): + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): - # prepare the data parallel context - strategy=dygraph.prepare_context() + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() - linear = Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer() + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) - # make the module become the data parallelism module - linear = dygraph.DataParallel(linear, strategy) + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) - x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = to_variable(x_data) + x_data = np.random.random(size=[10, 1]).astype(np.float32) + data = fluid.dygraph.to_variable(x_data) - hidden = linear(data) - avg_loss = fluid.layers.mean(hidden) + hidden = linear(data) + avg_loss = fluid.layers.mean(hidden) - # scale the loss according to the number of trainers. - avg_loss = linear.scale_loss(avg_loss) + # scale the loss according to the number of trainers. + avg_loss = linear.scale_loss(avg_loss) - avg_loss.backward() + avg_loss.backward() - # collect the gradients of trainers. - linear.apply_collective_grads() + # collect the gradients of trainers. + linear.apply_collective_grads() - adam.minimize(avg_loss) - linear.clear_gradients() + adam.minimize(avg_loss) + linear.clear_gradients() """ def __init__(self, layers, strategy): @@ -306,20 +303,23 @@ class DataParallel(layers.Layer): import numpy as np import paddle.fluid as fluid - import paddle.fluid.dygraph as dygraph - from paddle.fluid.optimizer import AdamOptimizer - from paddle.fluid.dygraph.nn import Linear - from paddle.fluid.dygraph.base import to_variable - - place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place=place): - strategy=dygraph.prepare_context() - linear = Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer() - linear = dygraph.DataParallel(linear, strategy) + + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): + + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() + + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = to_variable(x_data) + data = fluid.dygraph.to_variable(x_data) + hidden = linear(data) avg_loss = fluid.layers.mean(hidden) @@ -327,6 +327,8 @@ class DataParallel(layers.Layer): avg_loss = linear.scale_loss(avg_loss) avg_loss.backward() + + # collect the gradients of trainers. linear.apply_collective_grads() adam.minimize(avg_loss) @@ -380,7 +382,7 @@ class DataParallel(layers.Layer): self._reshape_inplace(x=g_var, shape=g_shape) assert g_var.shape == g_shape - @no_grad + @no_grad() def apply_collective_grads(self): """ AllReduce the Parameters' gradient. @@ -390,23 +392,29 @@ class DataParallel(layers.Layer): import numpy as np import paddle.fluid as fluid - import paddle.fluid.dygraph as dygraph - from paddle.fluid.optimizer import AdamOptimizer - from paddle.fluid.dygraph.nn import Linear - from paddle.fluid.dygraph.base import to_variable - - place = place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) - with fluid.dygraph.guard(place=place): - strategy=dygraph.prepare_context() - linear = Linear(1, 10, act="softmax") - adam = fluid.optimizer.AdamOptimizer() - linear = dygraph.DataParallel(linear, strategy) + + place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id) + with fluid.dygraph.guard(place): + + # prepare the data parallel context + strategy = fluid.dygraph.prepare_context() + + linear = fluid.dygraph.Linear(1, 10, act="softmax") + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, parameter_list=linear.parameters()) + + # make the module become the data parallelism module + linear = fluid.dygraph.DataParallel(linear, strategy) x_data = np.random.random(size=[10, 1]).astype(np.float32) - data = to_variable(x_data) + data = fluid.dygraph.to_variable(x_data) + hidden = linear(data) avg_loss = fluid.layers.mean(hidden) + + # scale the loss according to the number of trainers. avg_loss = linear.scale_loss(avg_loss) + avg_loss.backward() # collect the gradients of trainers. diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 7b4390c7a7b4e32fcb7937d47bedd875f1236006..9dbaab2580d21397fa7a4e03b03a5f1c4ac887f2 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -50,14 +50,19 @@ def monkey_patch_varbase(): static_var = var_base._to_static_var() """ + + # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph. + # It will fail. So, for propery in dygraph only, should not let it getattr(self, attr, None). + attr_not_need_keys = ['grad'] if isinstance(self, ParamBase): attr_kwargs = self.__dict__.copy() else: - attr_names = [ - name for name in dir(self) - if not (inspect.ismethod(getattr(self, name)) or - name.startswith('_')) - ] + attr_names = [] + for name in dir(self): + if name not in attr_not_need_keys and not ( + inspect.ismethod(getattr(self, name)) or + name.startswith('_')): + attr_names.append(name) attr_kwargs = {name: getattr(self, name) for name in attr_names} attr_keys = ['block', 'shape', 'dtype', 'type', 'name', 'persistable'] @@ -216,6 +221,14 @@ def monkey_patch_varbase(): else: return np.array(new_ivar.value().get_tensor()) + @property + def grad(self): + """ + The alias of gradient(). + """ + + return self.gradient() + def __str__(self): """ Convert a VarBase object to a readable string. @@ -239,9 +252,9 @@ def monkey_patch_varbase(): """ tensor = self.value().get_tensor() if tensor._is_initialized(): - return 'Variable: %s\n%s' % (self.name, str(tensor)) + return 'Tensor: %s\n%s' % (self.name, str(tensor)) else: - return 'Variable: %s, not initialized' % (self.name) + return 'Tensor: %s, not initialized' % (self.name) @property def block(self): @@ -260,8 +273,9 @@ def monkey_patch_varbase(): for method_name, method in ( ("__bool__", __bool__), ("__nonzero__", __nonzero__), ("_to_static_var", _to_static_var), ("set_value", set_value), - ("block", block), ("backward", backward), ("gradient", gradient), - ("__str__", __str__)): + ("block", block), ("backward", backward), ("grad", grad), + ("gradient", gradient), ("__str__", __str__), ("__repr__", __str__), + ("__module__", "paddle"), ("__name__", "Tensor")): setattr(core.VarBase, method_name, method) # patch math methods for varbase diff --git a/python/paddle/fluid/dygraph_utils.py b/python/paddle/fluid/dygraph_utils.py index 7b559494e6c3b779983e54f5f9675170ef985f63..a2338b874f51a209cf941d8c08d5995db4054968 100644 --- a/python/paddle/fluid/dygraph_utils.py +++ b/python/paddle/fluid/dygraph_utils.py @@ -45,17 +45,19 @@ def _append_activation_in_dygraph(input, @dygraph_only -def _append_bias_in_dygraph(input, bias=None, axis=1): +def _append_bias_in_dygraph(input, bias=None, axis=1, use_mkldnn=False): """Append bias operation in dygraph mode. Args: input: the input variable. bias: the bias to be appended axis: the axis to perform operation + use_mkldnn: whether to use mkldnn Return the Variable after bias operation """ if bias is None: return input - return core.ops.elementwise_add(input, bias, 'axis', axis) + return core.ops.elementwise_add(input, bias, 'axis', axis, 'use_mkldnn', + use_mkldnn) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 27a59e76593ec2c456bb63cb1defa4e1d1f3e77c..52cfd9bf0a3e2d11b7e3b892d0a8d0d973b33ce4 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -31,6 +31,7 @@ from .. import compat as cpt from .trainer_factory import TrainerFactory from .trainer_factory import FetchHandlerMonitor import copy +from . import framework from .incubate.checkpoint import auto_checkpoint as acp __all__ = ['Executor', 'global_scope', 'scope_guard'] @@ -544,10 +545,8 @@ class Executor(object): def __init__(self, place=None): if place is None: - if core.is_compiled_with_cuda(): - self.place = core.CUDAPlace(0) - else: - self.place = core.CPUPlace() + expected_place = framework._current_expected_place() + self.place = expected_place else: self.place = place self.program_caches = dict() @@ -851,6 +850,7 @@ class Executor(object): def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name, return_numpy, return_merged): + from paddle.optimizer.lr_scheduler import _LRScheduler exe = program._executor # TODO(zhenghuihuang): quantization uses Graph in CompiledProgram # instead of program. We will add support for checking Vars in Graph @@ -894,6 +894,16 @@ class Executor(object): res.append(res_dict) exe.feed_tensors_into_local_scopes(res) + if hasattr(program._program, 'lr_sheduler'): + lr_sheduler = program._program.lr_sheduler + assert isinstance(lr_sheduler, _LRScheduler), "must be _LRScheduler" + lr_value = lr_sheduler() + lr_var = program._program.global_block().vars[lr_sheduler._var_name] + lr_tensor = _as_lodtensor(lr_value, core.CPUPlace(), lr_var.dtype) + exe.feed_and_split_tensor_into_local_scopes({ + lr_sheduler._var_name: lr_tensor + }) + fetch_var_names = list(map(_to_name_str, fetch_list)) tensors = exe.run(fetch_var_names, return_merged)._move_to_list() return as_numpy(tensors) if return_numpy else tensors @@ -1157,6 +1167,26 @@ class Executor(object): compiled = isinstance(program, compiler.CompiledProgram) + # Check if fluid.data() variable no feed data + if use_prune: + if compiled: + global_block = program._program.global_block() + else: + global_block = program.global_block() + for varname in global_block.vars: + vardesc = global_block.desc.find_var(cpt.to_bytes(varname)) + varobj = global_block.vars[varname] + + # Can not check var build by fluid.layers.data(), bucause fluid.layers.data() had not set need_check_feed + if vardesc.persistable() == False and \ + vardesc.type() == core.VarDesc.VarType.LOD_TENSOR and \ + vardesc.need_check_feed() == True and \ + varobj._stop_gradient == True and \ + varobj.is_data == True and \ + varobj.belong_to_optimizer == False and \ + varname not in feed: + raise ValueError('Need feed data for variable %s' % varname) + acp._auto_checkpoint(self, program) # For backward compatibility, run directly. @@ -1203,7 +1233,7 @@ class Executor(object): def _run_program(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache): - + from paddle.optimizer.lr_scheduler import _LRScheduler if feed is None: feed = {} elif isinstance(feed, (list, tuple)): @@ -1259,6 +1289,16 @@ class Executor(object): fetch_var_name=fetch_var_name) self._feed_data(program, feed, feed_var_name, scope) + if hasattr(program, 'lr_sheduler'): + assert isinstance(program.lr_sheduler, + _LRScheduler), "must be _LRScheduler" + lr_sheduler = program.lr_sheduler + lr_value = lr_sheduler() + lr_var = program.global_block().vars[lr_sheduler._var_name] + data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) + tensor = core.get_variable_tensor(scope, lr_sheduler._var_name) + tensor.set(data, self.place) + if not use_program_cache: self._default_executor.run(program.desc, scope, 0, True, True, fetch_var_name) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 8fe22024e6f12238e1b5bdb5adab052aff811b04..ef50294b8e762ae84f9b37f2571458e6588c4bc6 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -48,6 +48,7 @@ __all__ = [ 'cuda_pinned_places', 'in_dygraph_mode', 'is_compiled_with_cuda', + 'is_compiled_with_xpu', 'Variable', 'ComplexVariable', 'load_op_library', @@ -64,7 +65,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() _dygraph_tracer_ = None -_dygraph_current_expected_place_ = None +_global_expected_place_ = None _current_device = None global_prog_seed = 0 @@ -247,7 +248,26 @@ def _dygraph_tracer(): def _current_expected_place(): - return _dygraph_current_expected_place_ + global _global_expected_place_ + if _global_expected_place_ is None: + if core.is_compiled_with_cuda(): + _global_expected_place_ = core.CUDAPlace(0) + else: + _global_expected_place_ = core.CPUPlace() + + return _global_expected_place_ + + +def _set_dygraph_tracer_expected_place(place): + global _dygraph_tracer_ + if _dygraph_tracer_ is not None: + _dygraph_tracer_._expected_place = place + + +def _set_expected_place(place): + global _global_expected_place_ + _global_expected_place_ = place + _set_dygraph_tracer_expected_place(place) # TODO(zhiqiu): remove this function. @@ -291,6 +311,21 @@ def _cuda_ids(): return device_ids +def is_compiled_with_xpu(): + """ + Whether this whl package can be used to run the model on XPU. + + Returns (bool): support xpu or not. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + support_xpu = fluid.is_compiled_with_xpu() + """ + return core.is_compiled_with_xpu() + + def is_compiled_with_cuda(): """ Whether this whl package can be used to run the model on GPU. @@ -1689,34 +1724,40 @@ def get_all_op_protos(): class ComplexVariable(object): """ - The Variable defined on the complex number domain. It contains two common - real number Variables as its members, :attr:`real` and :attr:`imag` + The ComplexTensor defined on the complex number domain. It contains two common + real number Tensor as its members, :attr:`real` and :attr:`imag` holding the real part and imaginary part of complex numbers respectively. **Notes**: - **The constructor of ComplexVariable should not be invoked directly.** + **The constructor of ComplexTensor should not be invoked directly.** - **Only support dygraph mode at present. Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph ComplexVariable with complex number data.** + **Only support dygraph mode at present. Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph ComplexTensor with complex number data.** Args: - real (Variable): The Variable holding real-part data. - imag (Variable): The Variable holding imaginery-part data. + real (Tensor): The Tensor holding real-part data. + imag (Tensor): The Tensor holding imaginery-part data. Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle import numpy as np - a = np.array([1.0+2.0j, 0.2]) - with fluid.dygraph.guard(): - var = fluid.dygraph.to_variable(a, name="new_var") - print(var.name, var.dtype, var.shape) - # ({'real': u'new_var.real', 'imag': u'new_var.imag'}, 'complex128', [2L]) - print(var.numpy()) - # [1. +2.j 0.2+0.j] + paddle.enable_imperative() + x = paddle.to_tensor([1.0+2.0j, 0.2]) + print(x.name, x.dtype, x.shape) + # ({'real': 'generated_tensor_0.real', 'imag': 'generated_tensor_0.imag'}, 'complex128', [2L]) + print(x.numpy()) + # [1. +2.j 0.2+0.j] + print(type(x)) + # """ + def __new__(cls, *arg, **kwargs): + cls.__module__ = "paddle" + cls.__name__ = "ComplexTensor" + return super(ComplexVariable, cls).__new__(cls) + def __init__(self, real, imag): assert real.shape == imag.shape, "The real part and imaginary part " \ "of a ComplexVariable should have the same shape!" @@ -1763,7 +1804,9 @@ class ComplexVariable(object): return self.real.numpy() + 1j * self.imag.numpy() def __str__(self): - return "REAL: " + self.real.__str__() + "IMAG: " + self.imag.__str__() + return "ComplexTensor[real]: %s\n%s\nComplexTensor[imag]: %s\n%s" % ( + self.real.name, str(self.real.value().get_tensor()), self.imag.name, + str(self.imag.value().get_tensor())) __repr__ = __str__ @@ -4407,6 +4450,8 @@ class Program(object): p._current_role = self._current_role p.__op_role_var = self.__op_role_var p._appending_grad_times = self._appending_grad_times + if hasattr(self, 'lr_sheduler'): + p.lr_sheduler = self.lr_sheduler #NOTE(zhiqiu): we sync the cloned program, to update its program by # its desc. @@ -5092,12 +5137,13 @@ class Parameter(Variable): class ParamBase(core.VarBase): """ - ParamBase is derived from VarBase( Which is the Variable in Dygraph Mode ). A ParamBase is a persistable - VarBase, and will be updated by optimizers after each iteration. + ParamBase is derived from Tensor( Which is the concept in Dygraph Mode). + A ParamBase is a persistable Tensor, and will be updated by optimizers + after each iteration. The training of a neural network is essentially the updating of its ParamBase. - Relative to a general Variable, a ParamBase has several its own + Relative to a general Tensor, a ParamBase has several its own member variables: Args: @@ -5186,11 +5232,8 @@ class ParamBase(core.VarBase): # - data: [...] paddle.enable_static() """ - tensor = self.value().get_tensor() - if tensor._is_initialized(): - return 'Parameter: %s\n%s' % (self.name, str(tensor)) - else: - return 'Parameter: %s, not initialized' % (self.name) + return "Parameter containing:\n {}\n - stop_gradient: {}".format( + super(ParamBase, self).__str__(), self.stop_gradient) __repr__ = __str__ @@ -5411,14 +5454,14 @@ def _dygraph_guard(tracer): @signature_safe_contextmanager def _dygraph_place_guard(place): - global _dygraph_current_expected_place_ - tmp_place = _dygraph_current_expected_place_ - _dygraph_current_expected_place_ = place + global _global_expected_place_ + tmp_place = _global_expected_place_ + _global_expected_place_ = place try: yield finally: - _dygraph_current_expected_place_ = tmp_place + _global_expected_place_ = tmp_place def load_op_library(lib_filename): diff --git a/python/paddle/fluid/generator.py b/python/paddle/fluid/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..e11b2e484dce1dd4260b3052d0f0a58f3cfc420a --- /dev/null +++ b/python/paddle/fluid/generator.py @@ -0,0 +1,60 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This is definition of generator class, which is for managing the state of the algorithm that produces pseudo random numbers.""" + +from . import core + +__all__ = ['Generator'] + +default_rng_seed_val = 34342423252 + + +class Generator(object): + """Generator class""" + + def __init__(self, device="CPU"): + """init""" + self.device = device + seed_in = default_rng_seed_val + if self.device == "CPU": + self.generator = core.Generator() + # self.generator.manual_seed(seed_in) + else: + raise ValueError( + "generator class with device %s does not exist, currently only support generator with device 'CPU' " + % device) + + def get_state(self): + return self.generator.get_state() + + def set_state(self, state): + self.generator.set_state(state) + + def manual_seed(self, seed): + self.generator.manual_seed(seed) + + def seed(self): + return self.generator.seed() + + def initial_seed(self): + return self.generator.initial_seed() + + def random(self): + return self.generator.random() + + def get_cpu_engine(self): + return self.generator.get_cpu_engine() + + def set_cpu_engine(self, cpu_engine): + self.generator.set_cpu_engine(cpu_engine) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py index d2c7397c85f8df155444d9272c7b75596f0fe169..1a7a82fbfac19b41e8b96c231ca74398f6b2214c 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py @@ -38,6 +38,7 @@ from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker from paddle.fluid.incubate.fleet.parameter_server import version from paddle.fluid.incubate.fleet.parameter_server.ir.public import get_sparse_tablenames from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_lr_ops +from paddle.fluid.incubate.fleet.parameter_server.ir.public import _has_global_step from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import TrainerRuntimeConfig, DistributedStrategy, \ SyncStrategy, AsyncStrategy, HalfAsyncStrategy, GeoStrategy, StrategyFactory @@ -161,9 +162,9 @@ class FleetTranspiler(Fleet): print(trainer_config) - lrs = _get_lr_ops(self._origin_main_program) + lrs = _has_global_step(_get_lr_ops(self._origin_main_program)) - if len(lrs) > 0: + if lrs > 0: kwargs = {"need_global_step": "1"} else: kwargs = {"need_global_step": "0"} @@ -186,14 +187,6 @@ class FleetTranspiler(Fleet): recv_ctx = fleet.compiled_config.get_communicator_recv_context( recv_type=1) - for name, ctx in send_ctx.items(): - print("name: {}, ctx: {}".format(name, ctx)) - - print("==== = ==== =============== ====") - - for name, ctx in recv_ctx.items(): - print("name: {}, ctx: {}".format(name, ctx)) - from paddle.fluid.communicator import Communicator self._communicator = Communicator( trainer_config.mode, kwargs, @@ -393,6 +386,12 @@ class FleetTranspiler(Fleet): "in fleet.save_inference_model() function, executor must be as Executor type" ) + # Todo(MrChengmo): support recv&save GPU-Kernel for ps-gpu model save + if not isinstance(executor.place, fluid.CPUPlace): + save_executor = Executor(fluid.CPUPlace()) + else: + save_executor = executor + if main_program is not None: if isinstance(main_program, CompiledProgram): raise TypeError( @@ -670,6 +669,11 @@ if you would like to save all variables in a raise TypeError( "in fleet.save_persistables() function, executor must be as Executor type" ) + # Todo(MrChengmo): support recv&save GPU-Kernel for ps-gpu model save + if not isinstance(executor.place, fluid.CPUPlace): + save_executor = Executor(fluid.CPUPlace()) + else: + save_executor = executor if main_program is None: main_program = self.main_program @@ -679,7 +683,8 @@ if you would like to save all variables in a "in fleet.save_persistables() function, main_program must be as Program type, CompiledProgram is not allowed" ) - self._save_distributed_persistables(executor, dirname, main_program) + self._save_distributed_persistables(save_executor, dirname, + main_program) @staticmethod def __exclude_vars(exclude_var_names=[]): diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index b96eff19e9b9c5d8e78b85e61b9a69afee106546..f9889997d9e38c98c4a736a62dbc72da7029f337 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -43,6 +43,8 @@ from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import RoundR OP_NAME_SCOPE = "op_namescope" CLIP_OP_NAME_SCOPE = "@CLIP" STEP_COUNTER = "@PS_STEP_COUNTER@" +LEARNING_RATE_DECAY_COUNTER = "@LR_DECAY_COUNTER@" + OP_ROLE_VAR_ATTR_NAME = core.op_proto_and_checker_maker.kOpRoleVarAttrName() RPC_OP_ROLE_ATTR_NAME = core.op_proto_and_checker_maker.kOpRoleAttrName() RPC_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.RPC @@ -62,6 +64,17 @@ def _get_lr_ops(program): return lr_ops +def _has_global_step(lr_ops): + if len(lr_ops) > 0: + for idx, op in enumerate(lr_ops): + if op.type != 'increment': + continue + counter = op.input("X")[0] + if counter == LEARNING_RATE_DECAY_COUNTER: + return True + return False + + def is_sparse_op(op): if op.type == "lookup_table" and op.attr('is_sparse') is True and op.attr( 'is_distributed') is False: diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 347927509e6d539555bd4d1b7a594febbc68f57b..15a3022f932f4a702bf7f94ed936468b6a06e94e 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -17,10 +17,12 @@ import warnings from .framework import Variable, in_dygraph_mode from .layer_helper import LayerHelper from .data_feeder import check_variable_and_dtype, check_dtype +from ..utils import deprecated __all__ = ['one_hot', 'embedding'] +@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot') def one_hot(input, depth, allow_out_of_range=False): """ :alias_main: paddle.nn.functional.one_hot diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 0e813e21ea3c0677fff8e9ac06af654ca52c02c4..ef469377acfbc0c2c521de61f8eacc0f7c9f0854 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -45,10 +45,23 @@ class SimpleLayer(Layer): def run_check(): - ''' install check to verify if install is success - + """To check whether install is successful This func should not be called only if you need to verify installation - ''' + + Examples: + .. code-block: python + + import paddle.fluid as fluid + fluid.install_check.run_check() + + # If installed successfully, output may be + # Running Verify Fluid Program ... + # W0805 04:24:59.496919 35357 device_context.cc:268] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 10.2, Runtime API Version: 10.1 + # W0805 04:24:59.505594 35357 device_context.cc:276] device: 0, cuDNN Version: 7.6. + # Your Paddle Fluid works well on SINGLE GPU or CPU. + # Your Paddle Fluid works well on MUTIPLE GPU or CPU. + # Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now + """ print("Running Verify Fluid Program ... ") device_list = [] diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 0b57b3fefd414c483c537957ed6ca3cfdd58fa65..6e38c855562809fa38cddbf6e58eb4eee6b899f3 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -23,8 +23,13 @@ from .param_attr import ParamAttr, WeightNormParamAttr from . import core from .initializer import _global_weight_initializer, _global_bias_initializer +__all__ = ['LayerHelperBase'] + class LayerHelperBase(object): + # global dtype + __dtype = "float32" + def __init__(self, name, layer_type): self._layer_type = layer_type self._name = name @@ -45,6 +50,14 @@ class LayerHelperBase(object): def startup_program(self): return default_startup_program() + @classmethod + def set_default_dtype(cls, dtype): + cls.__dtype = dtype + + @classmethod + def get_default_dtype(cls): + return cls.__dtype + def to_variable(self, value, name=None): """ The API will create a ``Variable`` object from numpy\.ndarray or Variable object. @@ -277,7 +290,7 @@ class LayerHelperBase(object): def create_parameter(self, attr, shape, - dtype, + dtype=None, is_bias=False, default_initializer=None, stop_gradient=False, @@ -299,6 +312,9 @@ class LayerHelperBase(object): if not attr: return None assert isinstance(attr, ParamAttr) + # set global dtype + if not dtype: + dtype = self.__dtype if is_bias: suffix = 'b' default_initializer = _global_bias_initializer( @@ -372,6 +388,9 @@ class LayerHelperBase(object): based on operator's `VarTypeInference` implementation in infer_var_type. """ + # set global dtype + if not dtype: + dtype = self.__dtype return self.main_program.current_block().create_var( name=unique_name.generate_with_ignorable_key(".".join( [self.name, 'tmp'])), diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 4217a98798ebbb46cb5b84e4c15fea4b4f0840ac..f468815c99ea2751913c5535c721ee9a6a5c5052 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np from functools import partial, reduce +from paddle.utils import deprecated from . import nn from .layer_function_generator import templatedoc from ..layer_helper import LayerHelper @@ -1619,6 +1620,7 @@ def huber_loss(input, label, delta): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.kl_div") @templatedoc() def kldiv_loss(x, target, reduction='mean', name=None): """ diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index fd1e7f800b928cfcecb9e09877f08c42c81defa6..38fc34472c8bc64338e2468bdf3f4b0bab1370ce 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -16,6 +16,7 @@ from __future__ import print_function import warnings import inspect +import paddle from .. import core from ..framework import Variable, unique_name @@ -45,6 +46,7 @@ EXPRESSION_MAP = { "__pow__": "A ** B", "__rpow__": "A **= B", "__floordiv__": "A //B", + "__rfloordiv__": "A //= B", "__mod__": "A % B", "__eq__": "A == B", "__ne__": "A != B", @@ -54,6 +56,31 @@ EXPRESSION_MAP = { "__ge__": "A >= B" } +# method for Tensor from paddle.tensor +# edit it when paddle.tensor has new method about Tensor operation +common_methods = [ + 'exp', 'tanh', 'atan', 'sqrt', 'rsqrt', 'abs', 'ceil', 'floor', 'cos', + 'acos', 'asin', 'sin', 'sinh', 'cosh', 'round', 'reciprocal', 'square', + 'rank', 'matmul', 'dot', 'norm', 'transpose', 'dist', 't', 'cross', + 'cholesky', 'bmm', 'histogram', 'equal', 'greater_equal', 'greater_than', + 'is_empty', 'isfinite', 'less_equal', 'less_than', 'logical_and', + 'logical_not', 'logical_or', 'logical_xor', 'not_equal', 'reduce_all', + 'reduce_any', 'allclose', 'equal_all', 'cast', 'expand', 'expand_as', + 'tile', 'flatten', 'gather', 'gather_nd', 'reshape', 'reverse', 'scatter', + 'scatter_nd_add', 'scatter_nd', 'shard_index', 'slice', 'split', 'squeeze', + 'strided_slice', 'unique', 'unique_with_counts', 'unsqueeze', 'flip', + 'unbind', 'roll', 'cumsum', 'increment', 'log', 'pow', 'reciprocal', + 'round', 'rsqrt', 'scale', 'sign', 'stanh', 'sum', 'reduce_prod', 'max', + 'min', 'mm', 'div', 'multiply', 'add', 'logsumexp', 'log1p', 'erf', + 'addcmul', 'addmm', 'clamp', 'trace', 'kron', 'argmax', 'argmin', 'argsort', + 'has_inf', 'has_nan', 'topk', 'index_select', 'nonzero', 'sort', + 'index_sample', 'mean', 'std', 'var', 'elementwise_add', 'elementwise_div', + 'elementwise_floordiv', 'elementwise_mod', 'elementwise_pow', + 'elementwise_sub' +] + +_already_patch_variable = False + def monkey_patch_variable(): def unique_tmp_name(): @@ -179,7 +206,7 @@ def monkey_patch_variable(): "out_dtype": out.dtype}) return out - def _scalar_elementwise_op_(var, scale, bias): + def _scalar_op_(var, scale, bias): block = current_block(var) out = create_new_tmp_var(block, var.dtype) block.append_op( @@ -191,27 +218,46 @@ def monkey_patch_variable(): return out def _neg_(var): - return _scalar_elementwise_op_(var, -1.0, 0.0) + return _scalar_op_(var, -1.0, 0.0) + + def _scalar_add_(var, value): + return _scalar_op_(var, 1.0, value) - def _scalar_elementwise_add_(var, value): - return _scalar_elementwise_op_(var, 1.0, value) + def _scalar_sub_(var, value): + return _scalar_op_(var, 1.0, -value) - def _scalar_elementwise_sub_(var, value): - return _scalar_elementwise_op_(var, 1.0, -value) + def _scalar_rsub_(var, value): + return _scalar_op_(var, -1.0, value) - def _scalar_elementwise_rsub_(var, value): - return _scalar_elementwise_op_(var, -1.0, value) + def _scalar_mul_(var, value): + return _scalar_op_(var, value, 0.0) - def _scalar_elementwise_mul_(var, value): - return _scalar_elementwise_op_(var, value, 0.0) + def _scalar_div_(var, value): + return _scalar_op_(var, 1.0 / value, 0.0) - def _scalar_elementwise_div_(var, value): - return _scalar_elementwise_op_(var, 1.0 / value, 0.0) + # TODO(shenliang03): currently, it supports divide, floor_divide, remainder + # for binary operator by using the api to achieve the type promotion + def _binary_method_creator_(op_type, reverse=False): + import paddle + + def __impl__(self, other_var): + op = getattr(paddle, op_type) + if reverse: + return op(other_var, self) + else: + return op(self, other_var) + + __impl__.__doc__ = """ + + See paddle.{}""".format(op_type) + __impl__.__name__ = op_type + + return __impl__ - def _elemwise_method_creator_(method_name, - op_type, - reverse=False, - scalar_method=None): + def _binary_creator_(method_name, + op_type, + reverse=False, + scalar_method=None): def __impl__(self, other_var): # FIXME(zjl): elementwise_div between integers cannot be converted to scale, # which may lose accuracy. This is a hot fix for release 1.6. @@ -296,35 +342,56 @@ def monkey_patch_variable(): __impl__.__name__ = method_name return __impl__ - # inject methods - for method_name, op_type, reverse, scalar_method in ( - ("__add__", "elementwise_add", False, _scalar_elementwise_add_), - # a+b == b+a. Do not need to reverse explicitly - ("__radd__", "elementwise_add", False, _scalar_elementwise_add_), - ("__sub__", "elementwise_sub", False, _scalar_elementwise_sub_), - ("__rsub__", "elementwise_sub", True, _scalar_elementwise_rsub_), - ("__mul__", "elementwise_mul", False, _scalar_elementwise_mul_), - # a*b == b*a. Do not need to reverse explicitly - ("__rmul__", "elementwise_mul", False, _scalar_elementwise_mul_), - ("__div__", "elementwise_div", False, _scalar_elementwise_div_), - ("__truediv__", "elementwise_div", False, _scalar_elementwise_div_), - ("__rdiv__", "elementwise_div", True, None), - ("__rtruediv__", "elementwise_div", True, None), - ("__pow__", "elementwise_pow", False, None), - ("__rpow__", "elementwise_pow", True, None), - ("__floordiv__", "elementwise_floordiv", False, None), - ("__mod__", "elementwise_mod", False, None), - # for logical compare - ("__eq__", "equal", False, None), - ("__ne__", "not_equal", False, None), - ("__lt__", "less_than", False, None), - ("__le__", "less_equal", False, None), - ("__gt__", "greater_than", False, None), - ("__ge__", "greater_equal", False, None)): - setattr(Variable, method_name, - _elemwise_method_creator_(method_name, op_type, reverse, - scalar_method)) - - # b = -a - Variable.__neg__ = _neg_ - Variable.astype = astype + variable_methods = [ + # b=-a + ('__neg__', _neg_), + ('astype', astype), + ('__add__', _binary_creator_('__add__', 'elementwise_add', False, + _scalar_add_)), + # a+b == b+a. Do not need to reverse explicitly + ('__radd__', + _binary_creator_('__radd__', 'elementwise_add', False, _scalar_add_)), + ('__sub__', _binary_creator_('__sub__', 'elementwise_sub', False, + _scalar_sub_)), + ('__rsub__', _binary_creator_('__rsub__', 'elementwise_sub', True, + _scalar_rsub_)), + ('__mul__', _binary_creator_('__mul__', 'elementwise_mul', False, + _scalar_mul_)), + # a*b == b*a. Do not need to reverse explicitly + ('__rmul__', + _binary_creator_('__rmul__', 'elementwise_mul', False, _scalar_mul_)), + ('__pow__', _binary_creator_('__pow__', 'elementwise_pow', False, + None)), + ('__rpow__', _binary_creator_('__rpow__', 'elementwise_pow', True, + None)), + # These binary use paddle.optype + ('__div__', _binary_method_creator_('divide', False)), + ('__rdiv__', _binary_method_creator_('divide', True)), + ('__truediv__', _binary_method_creator_('divide', False)), + ('__rtruediv__', _binary_method_creator_('divide', True)), + ('__floordiv__', _binary_method_creator_('floor_divide', False)), + ('__rfloordiv__', _binary_method_creator_('floor_divide', True)), + ('__mod__', _binary_method_creator_('remainder', False)), + # for logical compare + ('__eq__', _binary_creator_('__eq__', 'equal', False, None)), + ('__ne__', _binary_creator_('__ne__', 'not_equal', False, None)), + ('__lt__', _binary_creator_('__lt__', 'less_than', False, None)), + ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), + ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), + ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)) + ] + + global _already_patch_variable + if not _already_patch_variable: + for method in variable_methods: + method_name = method[0] + method_impl = method[1] + setattr(Variable, method_name, method_impl) + else: + import paddle.tensor + for method_name in common_methods: + if hasattr(Variable, method_name): continue + method_impl = getattr(paddle.tensor, method_name, None) + if method_impl: setattr(Variable, method_name, method_impl) + + _already_patch_variable = True diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py old mode 100644 new mode 100755 index 2fb518221e855d2242ace9844f461463ca38931e..da66503ceee37e30fafa0d5402edd2a188578a0b --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -26,7 +26,7 @@ import six import paddle from ..layer_helper import LayerHelper from ..initializer import Normal, Constant, NumpyArrayInitializer -from ..framework import Variable, OpProtoHolder, in_dygraph_mode, dygraph_only, _dygraph_tracer, default_main_program +from ..framework import Variable, OpProtoHolder, in_dygraph_mode, dygraph_only, _dygraph_tracer, default_main_program, _varbase_creator from .. import dygraph_utils from ..param_attr import ParamAttr from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ @@ -35,6 +35,7 @@ from . import utils from .. import unique_name from functools import reduce from .. import core +from ...utils import deprecated from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype import paddle from paddle.utils import deprecated @@ -931,6 +932,7 @@ def cos_sim(X, Y): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.dropout") def dropout(x, dropout_prob, is_test=False, @@ -938,9 +940,6 @@ def dropout(x, name=None, dropout_implementation="downgrade_in_infer"): """ - :alias_main: paddle.nn.functional.dropout - :alias: paddle.nn.functional.dropout,paddle.nn.functional.common.dropout - :old_api: paddle.fluid.layers.dropout Computes dropout. @@ -1188,6 +1187,7 @@ def chunk_eval(input, num_correct_chunks) +@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax") def softmax(input, use_cudnn=False, name=None, axis=-1): """ This operator implements the softmax layer. The calculation process is as follows: @@ -3362,6 +3362,15 @@ def data_norm(input, "BatchSum": batch_sum, "BatchSquareSum": batch_square_sum } + attrs = { + "epsilon": epsilon, + "sync_stats": sync_stats, + "summary_decay_rate": summary_decay_rate, + } + if slot_dim > 0: + attrs["slot_dim"] = slot_dim + if enable_scale_and_shift: + attrs["enable_scale_and_shift"] = enable_scale_and_shift if enable_scale_and_shift: inputs["scale_w"] = scale_w inputs["bias"] = bias @@ -3376,13 +3385,7 @@ def data_norm(input, "BatchSum": batch_sum, "BatchSquareSum": batch_square_sum }, - attrs={ - "epsilon": epsilon, - "slot_dim": slot_dim, - "sync_stats": sync_stats, - "summary_decay_rate": summary_decay_rate, - "enable_scale_and_shift": enable_scale_and_shift - }) + attrs=attrs) return helper.append_activation(data_norm_out) @@ -4594,7 +4597,7 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor, the data type is float32, float64, int32, int64. - dim (list|int, optional): The dimensions along which the product is performed. If + dim (int|list|tuple, optional): The dimensions along which the product is performed. If :attr:`None`, multiply all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`, @@ -4634,9 +4637,18 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_prod(y, dim=[0, 1]) # [105.0, 384.0] """ helper = LayerHelper('reduce_prod', **locals()) - out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) if dim is not None and not isinstance(dim, list): - dim = [dim] + if isinstance(dim, tuple): + dim = list(dim) + elif isinstance(dim, int): + dim = [dim] + else: + raise TypeError( + "The type of axis must be int, list or tuple, but received {}". + format(type(dim))) + check_variable_and_dtype( + input, 'input', ['float32', 'float64', 'int32', 'int64'], 'reduce_prod') + out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) helper.append_op( type='reduce_prod', inputs={'X': input}, @@ -4866,7 +4878,7 @@ def split(input, num_or_sections, dim=-1, name=None): check_variable_and_dtype( input, 'input', - ['bool', 'float16', 'float32', 'float64', 'int32', 'in64'], 'split') + ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], 'split') check_type(num_or_sections, 'num_or_sections', (list, int, tuple), 'split') check_type(dim, 'dim', (int, Variable), 'split') if isinstance(dim, Variable): @@ -5024,6 +5036,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): return out +@deprecated(since="2.0.0", update_to="paddle.matmul") def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): """ Applies matrix multiplication to two tensors. @@ -5095,7 +5108,65 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): y = fluid.layers.data(name='y', shape=[3, 2], dtype='float32') out = fluid.layers.matmul(x, y, True, True) """ - return paddle.matmul(x, y, transpose_x, transpose_y, alpha, name) + attrs = { + 'transpose_X': transpose_x, + 'transpose_Y': transpose_y, + 'alpha': float(alpha), + } + + if in_dygraph_mode(): + out = _varbase_creator(dtype=x.dtype) + core.ops.matmul(x, y, out, 'transpose_X', transpose_x, 'transpose_Y', + transpose_y, 'alpha', float(alpha)) + return out + + def __check_input(x, y): + var_names = {'x': x, 'y': y} + for name, val in var_names.items(): + check_variable_and_dtype( + val, name, ['float16', 'float32', 'float64'], 'matmul') + x_shape = list(x.shape) + y_shape = list(y.shape) + if len(x_shape) == 1: + x_shape = [1] + x_shape + if len(y_shape) == 1: + y_shape = y_shape + [1] + + # check the inner 2 dimensions + if transpose_x: + x_shape[-2], x_shape[-1] = x_shape[-1], x_shape[-2] + if transpose_y: + y_shape[-2], y_shape[-1] = y_shape[-1], y_shape[-2] + if x_shape[-1] != y_shape[-2]: + assert (x_shape[-1] == -1) or (y_shape[-2] == -1), \ + "After performing an optional transpose, Input X's width should be " \ + "equal to Y's width for multiplication " \ + "prerequisites. But received X's shape: %s, Y's shape: %s\n" % \ + (x_shape, y_shape) + + if len(y_shape) > 2 and len(x_shape) > 2: + for i, dim_x in enumerate(x_shape[:-2]): + # don't check neg shape + if dim_x < 0 or y_shape[i] < 0: + continue + if dim_x != y_shape[i]: + raise ValueError( + "When the matrix is larger than 2 dimensions, the higher " + "dimensional values of the two matrices need to be equal. " + "But received x_shape[%d] != y_shape[%d]. X's shape: %s, " + "Y's shape: %s.\n" % (i, i, x_shape, y_shape)) + + __check_input(x, y) + + helper = LayerHelper('matmul', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='matmul', + inputs={'X': x, + 'Y': y}, + outputs={'Out': out}, + attrs=attrs) + return out def topk(input, k, name=None): @@ -5800,6 +5871,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): return loss +@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot') def one_hot(input, depth, allow_out_of_range=False): """ @@ -5963,7 +6035,6 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): """ :alias_main: paddle.reshape :alias: paddle.reshape,paddle.tensor.reshape,paddle.tensor.manipulation.reshape - :old_api: paddle.fluid.layers.reshape This operator changes the shape of ``x`` without changing its data. @@ -6006,14 +6077,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): The parameter ``actual_shape`` will be deprecated in the future and only use ``shape`` instead to represent the target shape. Args: - x(Variable): A ``Tensor`` or ``LoDTensor`` . The data type is ``float32``, ``float64``, ``int32`` or ``int64``. - shape(list|tuple|Variable): Define the target shape. At most one dimension of the target shape can be -1. + x(Tensor): An N-D Tensor. The data type is ``float32``, ``float64``, ``int32`` or ``int64``. + shape(list|tuple|Tensor): Define the target shape. At most one dimension of the target shape can be -1. The data type is ``int32`` . If ``shape`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. - If ``shape`` is an Variable, it should be an 1-D Tensor . + If ``shape`` is an Tensor, it should be an 1-D Tensor . actual_shape(variable, optional): An 1-D ``Tensor`` or ``LoDTensor`` . The data type is ``int32`` . If provided, reshape according to this given shape rather than ``shape`` specifying shape. That is to say ``actual_shape`` has a higher priority - than ``shape(list|tuple)`` but not ``shape(Variable)``. \ + than ``shape(list|tuple)`` but not ``shape(Tensor)``. \ This argument ``actual_shape`` will be removed in a future version. \ Instructions for updating: ``actual_shape`` will be removed in future versions and replaced by ``shape``. act (str, optional): The non-linear activation to be applied to the reshaped input. Default None. @@ -6025,10 +6096,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): For more information, please refer to :ref:`api_guide_Name` . Returns: - Variable: A ``Tensor`` or ``LoDTensor``. The data type is same as ``x``. It is a new tensor variable if ``inplace`` is ``False``, otherwise it is ``x``. If ``act`` is None, return the reshaped tensor variable, otherwise return the activated tensor variable. + Tensor: A reshaped Tensor with the same data type as ``x``. It is a new tensor variable if ``inplace`` is ``False``, otherwise it is ``x``. If ``act`` is None, return the reshaped tensor variable, otherwise return the activated tensor variable. Raises: - TypeError: If actual_shape is neither Variable nor None. + TypeError: If actual_shape is neither Tensor nor None. ValueError: If more than one elements of ``shape`` is -1. ValueError: If the element of ``shape`` is 0, the corresponding dimension should be less than or equal to the dimension of ``x``. ValueError: If the elements in ``shape`` is negative except -1. @@ -6039,7 +6110,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): import paddle.fluid as fluid # example 1: - # attr shape is a list which doesn't contain tensor Variable. + # attr shape is a list which doesn't contain Tensors. data_1 = fluid.data( name='data_1', shape=[2, 4, 6], dtype='float32') reshaped_1 = fluid.layers.reshape( @@ -6047,7 +6118,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): # the shape of reshaped_1 is [2,4,3,2]. # example 2: - # attr shape is a list which contains tensor Variable. + # attr shape is a list which contains Tensors. data_2 = fluid.layers.fill_constant([2,25], "int32", 3) dim = fluid.layers.fill_constant([1], "int32", 5) reshaped_2 = fluid.layers.reshape(data_2, shape=[dim, 10]) @@ -8139,9 +8210,9 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): return image_resize(input=input, out_shape=out_shape, resample=resample) +@deprecated(since="2.0.0", update_to="paddle.gather") def gather(input, index, overwrite=True): """ - **Gather Layer** Output is obtained by gathering entries of the outer-most dimension of X indexed by `index` and concatenate them together. @@ -8168,19 +8239,21 @@ def gather(input, index, overwrite=True): [5, 6]] Args: - input (Variable): The source input tensor with rank>=1. Supported data type is + input (Tensor): The source input tensor with rank>=1. Supported data type is int32, int64, float32, float64 and uint8 (only for CPU), float16 (only for GPU). - index (Variable): The index input tensor with rank=1. Data type is int32 or int64. + index (Tensor): The index input tensor with rank=1. Data type is int32 or int64. overwrite (bool, optional): The mode that updating the grad when has same index. If True, use the overwrite mode to update the grad of the same index, if False, use the accumulate mode to update the grad of the same index. Default value is True. - - Returns: - output (Variable): The output is a tensor with the same rank as input. + output (Tensor): The output is a tensor with the same rank as input. + + Raises: + TypeError: ``x`` must be a Tensor and the data type of ``x`` must to be one of float16, float32, float64, int32, int64, uint8. + TypeError: ``index`` must be a Tensor and the data type of ``index`` must be int32 or int64. Examples: @@ -8191,6 +8264,13 @@ def gather(input, index, overwrite=True): index = fluid.data(name='index', shape=[-1, 1], dtype='int32') output = fluid.layers.gather(x, index) """ + if in_dygraph_mode(): + return core.ops.gather(input, index, None) + + check_variable_and_dtype( + input, 'x', + ['float16', 'float32', 'float64', 'int32', 'int64', 'uint8'], 'gather') + check_variable_and_dtype(index, 'index', ['int32', 'int64'], 'gather') helper = LayerHelper('gather', **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) @@ -8203,6 +8283,7 @@ def gather(input, index, overwrite=True): return out +@deprecated(since="2.0.0", update_to="paddle.gather_nd") def gather_nd(input, index, name=None): """ **Gather Nd Layer** @@ -8255,14 +8336,18 @@ def gather_nd(input, index, name=None): = [23] Args: - input (Variable): The source input. Its dtype should be int32, int64, float32, float64. - index (Variable): The index input with rank > 1, index.shape[-1] <= input.rank. - Its dtype should be int32, int64. - name (str|None): A name for this layer(optional). If set None, the - layer will be named automatically. + input (Tensor): The input Tensor which it's data type should be bool, float32, float64, int32, int64. + index (Tensor): The index input with rank > 1, index.shape[-1] <= input.rank. + Its dtype should be int32, int64. + name(str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . Returns: - output (Variable): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:] + output (Tensor): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:] + + Raises: + TypeError: ``input`` must be a Tensor and the data type of ``input`` must be one of float32, float64, int32 and int64. + TypeError: ``index`` must be a Tensor and the data type of ``index`` must be one of int32 and int64. Examples: @@ -8274,6 +8359,12 @@ def gather_nd(input, index, name=None): output = fluid.layers.gather_nd(x, index) """ + if in_dygraph_mode(): + return core.ops.gather_nd(input, index) + check_variable_and_dtype(input, 'input', + ['bool', 'float32', 'float64', 'int32', 'int64'], + 'gather_np') + check_variable_and_dtype(index, 'index', ['int32', 'int64'], 'gather_np') helper = LayerHelper('gather_nd', **locals()) dtype = helper.input_dtype() output = helper.create_variable_for_type_inference(dtype) @@ -8285,6 +8376,7 @@ def gather_nd(input, index, name=None): return output +@deprecated(since="2.0.0", update_to="paddle.scatter") def scatter(input, index, updates, name=None, overwrite=True): """ :alias_main: paddle.scatter @@ -8600,7 +8692,7 @@ def log(x, name=None): return out -@templatedoc() +@deprecated(since="2.0.0", update_to="paddle.nn.functional.relu") def relu(x, name=None): """ ${comment} @@ -8642,11 +8734,9 @@ def relu(x, name=None): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.selu") def selu(x, scale=None, alpha=None, name=None): """ - :alias_main: paddle.nn.functional.selu - :alias: paddle.nn.functional.selu,paddle.nn.functional.activation.selu - :old_api: paddle.fluid.layers.selu Selu Operator. @@ -9261,7 +9351,7 @@ def pad2d(input, return out -@templatedoc() +@deprecated(since="2.0.0", update_to="paddle.nn.functional.elu") def elu(x, alpha=1.0, name=None): """ :alias_main: paddle.nn.functional.elu @@ -9303,12 +9393,9 @@ def elu(x, alpha=1.0, name=None): return out -@templatedoc() +@deprecated(since="2.0.0", update_to="paddle.nn.functional.relu6") def relu6(x, threshold=6.0, name=None): """ - :alias_main: paddle.nn.functional.relu6 - :alias: paddle.nn.functional.relu6,paddle.nn.functional.activation.relu6 - :old_api: paddle.fluid.layers.relu6 ${comment} @@ -9580,6 +9667,7 @@ def swish(x, beta=1.0, name=None): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.prelu") def prelu(x, mode, param_attr=None, name=None): """ :api_attr: Static Graph @@ -9708,13 +9796,10 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.leaky_relu") @templatedoc() def leaky_relu(x, alpha=0.02, name=None): """ - :alias_main: paddle.nn.functional.leaky_relu - :alias: paddle.nn.functional.leaky_relu,paddle.nn.functional.activation.leaky_relu - :old_api: paddle.fluid.layers.leaky_relu - ${comment} Args: x(${x_type}): ${x_comment} @@ -9743,19 +9828,7 @@ def leaky_relu(x, alpha=0.02, name=None): res_val, = exe.run(fluid.default_main_program(), feed={'x':x_i}, fetch_list=[res]) print(res_val) # [[-0.1, 2], [3, -0.4]] """ - if in_dygraph_mode(): - return core.ops.leaky_relu(x, 'alpha', alpha) - - check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], - 'leaky_relu') - - inputs = {'X': [x]} - attrs = {'alpha': alpha} - helper = LayerHelper('leaky_relu', **locals()) - out = helper.create_variable_for_type_inference(dtype=x.dtype) - helper.append_op( - type='leaky_relu', inputs=inputs, outputs={'Out': out}, attrs=attrs) - return out + return paddle.nn.functional.leaky_relu(x, alpha, name) def soft_relu(x, threshold=40.0, name=None): @@ -10094,12 +10167,12 @@ def unstack(x, axis=0, num=None): raised. Args: - x (Variable): Input Tensor. It is a N-D Tensors of data types float32, float64, int32, int64. + x (Tensor): Input Tensor. It is a N-D Tensors of data types float32, float64, int32, int64. axis (int): The axis along which the input is unstacked. num (int|None): The number of output variables. Returns: - list(Variable): The unstacked Tensors list. The list elements are N-D Tensors of data types float32, float64, int32, int64. + list(Tensor): The unstacked Tensors list. The list elements are N-D Tensors of data types float32, float64, int32, int64. Raises: ValueError: If x.shape[axis] <= 0 or axis is not in range [-D, D). @@ -10108,7 +10181,7 @@ def unstack(x, axis=0, num=None): .. code-block:: python import paddle.fluid as fluid - x = fluid.layers.data(name='x', shape=[2, 3, 5], dtype='float32') # create a tensor with shape=[2, 3, 5] + x = fluid.data(name='x', shape=[2, 3, 5], dtype='float32') # create a tensor with shape=[2, 3, 5] y = fluid.layers.unstack(x, axis=1) # unstack with second axis, which results 3 tensors with shape=[2, 5] """ @@ -10320,6 +10393,7 @@ def expand_as(x, target_tensor, name=None): from paddle.fluid.framework import convert_np_dtype_to_dtype_ +@deprecated(since='1.8.0', update_to="paddle.uniform") @templatedoc() def uniform_random_batch_size_like(input, shape, @@ -10415,6 +10489,7 @@ def uniform_random_batch_size_like(input, return out +@deprecated(since="2.0.0", update_to="paddle.normal") @templatedoc() def gaussian_random(shape, mean=0.0, @@ -10589,6 +10664,7 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'): return out +@deprecated(since='1.8.0', update_to="paddle.normal") @templatedoc() def gaussian_random_batch_size_like(input, shape, @@ -11177,6 +11253,7 @@ def rank(input): return out +@deprecated(since="2.0.0", update_to="paddle.numel") def size(input): """ **Size Layer** @@ -11184,11 +11261,14 @@ def size(input): Returns the number of elements for a tensor, which is a int64 Tensor with shape [1]. Args: - input (Variable): The input variable. + input (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64. Returns: - Variable: The number of elements for the input variable. + Tensor: The number of elements for the input Tensor. + Raises: + TypeError: ``input`` must be a Tensor and the data type of ``input`` must be one of bool, float16, float32, float64, int32, int64. + Examples: .. code-block:: python @@ -11199,6 +11279,11 @@ def size(input): rank = layers.size(input) # 300 """ + if in_dygraph_mode(): + return core.ops.size(x) + check_variable_and_dtype( + x, 'x', ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], + "size") helper = LayerHelper('size', **locals()) out = helper.create_variable_for_type_inference(dtype='int64') helper.append_op(type='size', inputs={'Input': input}, outputs={'Out': out}) @@ -11414,11 +11499,17 @@ Examples: """ if in_dygraph_mode(): return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name='elementwise_add') + x, + y, + axis=axis, + act=act, + op_name='elementwise_add', + use_mkldnn=core.globals()["FLAGS_use_mkldnn"]) return _elementwise_op(LayerHelper('elementwise_add', **locals())) +@deprecated(since="2.0.0", update_to="paddle.divide") def elementwise_div(x, y, axis=-1, act=None, name=None): """ :alias_main: paddle.elementwise_div @@ -11842,6 +11933,7 @@ Examples: return _elementwise_op(LayerHelper('elementwise_pow', **locals())) +@deprecated(since="2.0.0", update_to="paddle.remainder") def elementwise_mod(x, y, axis=-1, act=None, name=None): """ :alias_main: paddle.elementwise_mod @@ -11879,6 +11971,7 @@ Examples: return _elementwise_op(LayerHelper('elementwise_mod', **locals())) +@deprecated(since="2.0.0", update_to="paddle.floor_divide") def elementwise_floordiv(x, y, axis=-1, act=None, name=None): """ :alias_main: paddle.elementwise_floordiv @@ -12049,12 +12142,15 @@ def logical_and(x, y, out=None, name=None): paddle.disable_static() x_data = np.array([True, True, False, False], dtype=np.bool) y_data = np.array([True, False, True, False], dtype=np.bool) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) res = paddle.logical_and(x, y) print(res.numpy()) # [True False False False] """ - + if x.shape != y.shape: + raise TypeError( + 'Input tensors must be same shape, but received x \'s shape: %s, y \'s shape: %s ' + % (x.shape, y.shape)) return _logical_op( op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True) @@ -12096,7 +12192,10 @@ def logical_or(x, y, out=None, name=None): res = paddle.logical_or(x, y) print(res.numpy()) # [True True True False] """ - + if x.shape != y.shape: + raise TypeError( + 'Input tensors must be same shape, but received x \'s shape: %s, y \'s shape: %s ' + % (x.shape, y.shape)) return _logical_op( op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True) @@ -12138,7 +12237,10 @@ def logical_xor(x, y, out=None, name=None): res = paddle.logical_xor(x, y) print(res.numpy()) # [False True True False] """ - + if x.shape != y.shape: + raise TypeError( + 'Input tensors must be same shape, but received x \'s shape: %s, y \'s shape: %s ' + % (x.shape, y.shape)) return _logical_op( op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True) @@ -12184,8 +12286,6 @@ def logical_not(x, out=None, name=None): @templatedoc() def clip(x, min, max, name=None): """ - :alias_main: paddle.nn.clip - :alias: paddle.nn.clip,paddle.nn.clip.clip :old_api: paddle.fluid.layers.clip ${comment} @@ -12280,13 +12380,10 @@ def clip_by_norm(x, max_norm, name=None): return out +@deprecated(since="2.0.0", update_to="paddle.mean") @templatedoc() def mean(x, name=None): """ - :alias_main: paddle.mean - :alias: paddle.mean,paddle.tensor.mean,paddle.tensor.stat.mean - :old_api: paddle.fluid.layers.mean - ${comment} Args: @@ -13965,12 +14062,9 @@ def where(condition): return out +@deprecated(since="2.0.0", update_to="paddle.sign") def sign(x): """ - :alias_main: paddle.sign - :alias: paddle.sign,paddle.tensor.sign,paddle.tensor.math.sign - :old_api: paddle.fluid.layers.sign - This OP returns sign of every element in `x`: 1 for positive, -1 for negative and 0 for zero. Args: diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 829b27a405970ee9bbf0a348246ae05b8c453925..84cacea6ba5723f8a06fc87fa9c59d96f802e65a 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -20,13 +20,20 @@ from ..framework import convert_np_dtype_to_dtype_, Variable from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from paddle.utils import deprecated +__deprecated_func_name__ = {'tanh_shrink': 'tanhshrink', } + __activations_noattr__ = [ 'sigmoid', 'logsigmoid', - 'exp', + 'tanh_shrink', + 'softplus', + 'softsign', 'tanh', +] + +__unary_func__ = [ + 'exp', 'atan', - 'tanh_shrink', 'sqrt', 'rsqrt', 'abs', @@ -34,15 +41,13 @@ __activations_noattr__ = [ 'floor', 'cos', 'acos', - 'asin', 'sin', 'sinh', + 'asin', 'cosh', 'round', 'reciprocal', 'square', - 'softplus', - 'softsign', ] __all__ = [] @@ -58,9 +63,24 @@ globals()['_scale'] = generate_layer_fn('scale') globals()['_elementwise_div'] = generate_layer_fn('elementwise_div') __all__ += __activations_noattr__ +__all__ += __unary_func__ for _OP in set(__activations_noattr__): - globals()[_OP] = generate_activation_fn(_OP) + _new_OP = _OP + if _OP in __deprecated_func_name__: + _new_OP = __deprecated_func_name__[_OP] + func = generate_activation_fn(_OP) + func = deprecated( + since="2.0.0", update_to="paddle.nn.functional.%s" % (_new_OP))(func) + globals()[_OP] = func + +for _OP in set(__unary_func__): + _new_OP = _OP + if _OP in __deprecated_func_name__: + _new_OP = __deprecated_func_name__[_OP] + func = generate_activation_fn(_OP) + func = deprecated(since="2.0.0", update_to="paddle.%s" % (_new_OP))(func) + globals()[_OP] = func add_sample_code(globals()["sigmoid"], r""" Examples: @@ -148,16 +168,14 @@ add_sample_code(globals()["tanh_shrink"], r""" Examples: .. code-block:: python - import numpy as np import paddle import paddle.nn.functional as F + import numpy as np + paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) - out = F.tanh_shrink(x) - print(out.numpy()) - # [-0.02005104 -0.00262468 0.00033201 0.00868739] + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739] """) @@ -389,16 +407,14 @@ add_sample_code(globals()["softplus"], r""" Examples: .. code-block:: python - import numpy as np import paddle import paddle.nn.functional as F + import numpy as np + paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) - out = F.softplus(x) - print(out.numpy()) - # [0.51301525 0.59813887 0.74439666 0.85435524] + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355] """) @@ -406,16 +422,14 @@ add_sample_code(globals()["softsign"], r""" Examples: .. code-block:: python - import numpy as np import paddle import paddle.nn.functional as F + import numpy as np + paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) - out = F.softsign(x) - print(out.numpy()) - # [-0.28571429 -0.16666667 0.09090909 0.23076923] + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769] """) @@ -474,6 +488,7 @@ __all__ += ['hard_shrink'] _hard_shrink_ = generate_layer_fn('hard_shrink') +@deprecated(since="2.0.0", update_to="paddle.nn.functional.hardshrink") def hard_shrink(x, threshold=None): check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'hard_shrink') @@ -487,10 +502,6 @@ def hard_shrink(x, threshold=None): hard_shrink.__doc__ = _hard_shrink_.__doc__ + """ - :alias_main: paddle.nn.functional.hard_shrink - :alias: paddle.nn.functional.hard_shrink,paddle.nn.functional.activation.hard_shrink - :old_api: paddle.fluid.layers.hard_shrink - Examples: >>> import paddle.fluid as fluid @@ -636,6 +647,7 @@ __all__ += ['gelu'] _gelu_ = generate_layer_fn('gelu') +@deprecated(since="2.0.0", update_to="paddle.nn.functional.gelu") def gelu(x, approximate=False): locals_var = locals().copy() kwargs = dict() @@ -646,10 +658,6 @@ def gelu(x, approximate=False): gelu.__doc__ = """ - :alias_main: paddle.nn.functional.gelu - :alias: paddle.nn.functional.gelu,paddle.nn.functional.activation.gelu - :old_api: paddle.fluid.layers.gelu - :strong:`GeLU Activation Operator` For more details, see [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415). @@ -724,7 +732,7 @@ __all__ += ['erf'] _erf_ = generate_layer_fn('erf') -def erf(x): +def erf(x, name=None): locals_var = locals().copy() kwargs = dict() for name, val in locals_var.items(): @@ -734,10 +742,6 @@ def erf(x): erf.__doc__ = """ - :alias_main: paddle.erf - :alias: paddle.erf,paddle.tensor.erf,paddle.tensor.math.erf,paddle.nn.functional.erf,paddle.nn.functional.activation.erf - :old_api: paddle.fluid.layers.erf - :strong:`Erf Operator` For more details, see [Error function](https://en.wikipedia.org/wiki/Error_function). @@ -747,57 +751,22 @@ Equation: Args: - x(Variable): The input of Erf op, Tensor or LoDTensor, dtype: float32 or float64. + x (Tensor): The input tensor, it's data type should be float32, float64. Returns: - Variable: The output of Erf op, Tensor or LoDTensor, dtype: float32 or float64, the same as the input, shape: the same as the input. + Tensor: The output of Erf op, dtype: float32 or float64, the same as the input, shape: the same as the input. Examples: .. code-block:: python - # declarative mode - import numpy as np - from paddle import fluid - - x = fluid.data(name="x", shape=(-1, 3), dtype="float32") - y = fluid.layers.erf(x) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - start = fluid.default_startup_program() - main = fluid.default_main_program() - - data = np.random.randn(2, 3).astype("float32") - exe.run(start) - - y_np, = exe.run(main, feed={"x": data}, fetch_list=[y]) - - data - # array([[ 0.4643714 , -1.1509596 , 1.2538221 ], - # [ 0.34369683, 0.27478245, 1.1805398 ]], dtype=float32) - y_np - # array([[ 0.48863927, -0.8964121 , 0.9237998 ], - # [ 0.37307587, 0.30242872, 0.9049887 ]], dtype=float32) - - .. code-block:: python - - # imperative mode import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - - data = np.random.randn(2, 3).astype("float32") - place = fluid.CPUPlace() - with dg.guard(place) as g: - x = dg.to_variable(data) - y = fluid.layers.erf(x) - y_np = y.numpy() - data - # array([[ 0.4643714 , -1.1509596 , 1.2538221 ], - # [ 0.34369683, 0.27478245, 1.1805398 ]], dtype=float32) - y_np - # array([[ 0.48863927, -0.8964121 , 0.9237998 ], - # [ 0.37307587, 0.30242872, 0.9049887 ]], dtype=float32) + import paddle + paddle.disable_static() + x_data = np.array([-0.4, -0.2, 0.1, 0.3]) + x = paddle.to_tensor(x_data) + out = paddle.erf(x) + print(out.numpy()) + # [-0.42839236 -0.22270259 0.11246292 0.32862676] """ diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index ecc58768522831b55f620cb6dc911630e2c2ad68..bc1368b562d7b354ce34dc87679fd8a0c5a3d012 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -127,7 +127,8 @@ class RNNCell(object): else: integer_types = (int, ) check_variable_and_dtype(batch_ref, 'batch_ref', - ['float32', 'float64'], 'RNNCell') + ['float32', 'float64', 'int32', 'int64'], + 'RNNCell') check_type(shape, 'shape', (list, tuple, type(None), integer_types), 'RNNCell') if isinstance(shape, (list, tuple)): @@ -2212,9 +2213,9 @@ def lstm(input, input ( :ref:`api_guide_Variable_en` ): LSTM input tensor, 3-D Tensor of shape :math:`[batch\_size, seq\_len, input\_dim]` . Data type is float32 or float64 init_h( :ref:`api_guide_Variable_en` ): The initial hidden state of the LSTM, 3-D Tensor of shape :math:`[num\_layers, batch\_size, hidden\_size]` . If is_bidirec = True, shape should be :math:`[num\_layers*2, batch\_size, hidden\_size]` . Data type is float32 or float64. + max_len (int): This parameter has no effect and will be discarded. init_c( :ref:`api_guide_Variable_en` ): The initial cell state of the LSTM, 3-D Tensor of shape :math:`[num\_layers, batch\_size, hidden\_size]` . If is_bidirec = True, shape should be :math:`[num\_layers*2, batch\_size, hidden\_size]` . Data type is float32 or float64. - max_len (int): max length of LSTM. the first dim of input tensor CAN NOT greater than max_len. hidden_size (int): hidden size of the LSTM. num_layers (int): total layers number of the LSTM. dropout_prob(float, optional): dropout prob, dropout ONLY work between rnn layers, NOT between time steps @@ -2255,7 +2256,6 @@ def lstm(input, data = fluid.data(name='x', shape=[None, 100], dtype='int64') emb = fluid.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True) batch_size = 20 - max_len = 100 dropout_prob = 0.2 input_size = 100 hidden_size = 150 @@ -2308,9 +2308,11 @@ def lstm(input, out = helper.create_variable_for_type_inference(dtype) last_h = helper.create_variable_for_type_inference(dtype) last_c = helper.create_variable_for_type_inference(dtype) - - cache = helper.create_variable( - persistable=True, type=core.VarDesc.VarType.RAW, stop_gradient=True) + reserve = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) + state_out = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) + state_out.persistable = True helper.append_op( type='cudnn_lstm', @@ -2319,15 +2321,15 @@ def lstm(input, 'InitH': init_h, 'InitC': init_c, 'W': weight, - 'Cache': cache, }, outputs={ 'Out': out, - 'last_h': last_h, - 'last_c': last_c, + 'LastH': last_h, + 'LastC': last_c, + 'Reserve': reserve, + 'StateOut': state_out, }, attrs={ - 'max_len': max_len, 'is_bidirec': is_bidirec, 'input_size': input_size, 'hidden_size': hidden_size, @@ -3101,7 +3103,8 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): 'beam_search_encode') helper = LayerHelper('beam_search_decode', **locals()) sentence_ids = helper.create_variable_for_type_inference(dtype=ids.dtype) - sentence_scores = helper.create_variable_for_type_inference(dtype=ids.dtype) + sentence_scores = helper.create_variable_for_type_inference( + dtype=scores.dtype) helper.append_op( type="beam_search_decode", diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 7ac67b1bc817964ca65d5b7009b446458d2cc7ab..77a78eb4a14a0a5ad9be9cff71131ca473106ab8 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -26,6 +26,7 @@ from .. import core from .layer_function_generator import templatedoc from . import utils from ..data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype +from paddle.utils import deprecated import numpy import warnings @@ -642,7 +643,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): shape(list|tuple|Tensor): Shape of the output Tensor, the data type of ``shape`` is int32 or int64. If ``shape`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``shape`` is an Tensor, it should be an 1-D Tensor with date type int32 or int64. - dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output Tensor which can + dtype(np.dtype|str): Data type of the output Tensor which can be float16, float32, float64, int32, int64. value(bool|float|int|Tensor): The constant value used to initialize the Tensor to be created. If ``value`` is an Tensor, it should be an 1-D Tensor. @@ -746,6 +747,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): return out +@deprecated(since='1.8.0', update_to="paddle.fill_constant") @templatedoc() def fill_constant_batch_size_like(input, shape, @@ -1040,7 +1042,7 @@ def ones(shape, dtype, force_cpu=False): Parameters: shape(tuple|list|Tensor): Shape of output Tensor, the data type of shape is int32 or int64. - dtype (np.dtype|core.VarDesc.VarType|str): Data type of output Tensor, it supports + dtype (np.dtype|str): Data type of output Tensor, it supports bool, float16, float32, float64, int32 and int64. force_cpu (bool, optional): Whether force to store the output Tensor in CPU memory. If :attr:`force_cpu` is False, the output Tensor will be stored in running device memory. @@ -1073,7 +1075,7 @@ def zeros(shape, dtype, force_cpu=False, name=None): Parameters: shape(tuple|list|Tensor): Shape of output Tensor, the data type of ``shape`` is int32 or int64. - dtype (np.dtype|core.VarDesc.VarType|str): Data type of output Tensor, it supports + dtype (np.dtype|str): Data type of output Tensor, it supports bool, float16, float32, float64, int32 and int64. force_cpu (bool, optional): Whether force to store the output Tensor in CPU memory. If :attr:`force_cpu` is False, the output Tensor will be stored in running device memory. @@ -1435,14 +1437,14 @@ def linspace(start, stop, num, dtype=None, name=None): This OP return fixed number of evenly spaced values within a given interval. Args: - start(float|Tensor): The input :attr:`start` is start variable of range. It is a float scalar, \ - or a Tensor of shape [1] with input data type float32, float64. - stop(float|Tensor): The input :attr:`stop` is start variable of range. It is a float scalar, \ - or a Tensor of shape [1] with input data type float32, float64. + start(int|float|Tensor): The input :attr:`start` is start variable of range. It is a scalar, \ + or a Tensor of shape [1] with input data type int32, int64, float32 or float64. + stop(int|float|Tensor): The input :attr:`stop` is start variable of range. It is a scalar, \ + or a Tensor of shape [1] with input data type int32, int64, float32 or float64. num(int|Tensor): The input :attr:`num` is given num of the sequence. It is an int scalar, \ - or a Tensor of shape [1] with data type int32. - dtype(np.dtype|core.VarDesc.VarType|str, optional): The data type of output tensor, it could be 'float32' and 'float64'. - Default: if None, the data type is float32. + or a Tensor of shape [1] with data type int32 or int64. + dtype(np.dtype|str, optional): The data type of output tensor, it could be + int32, int64, float32 and float64. Default: if None, the data type is float32. name(str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.Default: None. @@ -1452,9 +1454,11 @@ def linspace(start, stop, num, dtype=None, name=None): the value with input :attr:`start`. Raises: - TypeError: The ``dtype`` must be one of float32 and float64. - TypeError: The data type of ``start`` and ``stop`` must be one of float32 and float64. - TypeError: The data type of ``num`` must be one of int32 and int64. + TypeError: The ``dtype`` must be one of int32, int64, float32 and float64. + TypeError: The type of ``num`` must be int When it's not a Tensor. + TypeError: The data type of ``num`` must be int32 When it's a Tensor. + TypeError: The data type of ``start`` and ``stop`` must be same as ``dtype`` When it's a Tensor. + Examples: @@ -1467,29 +1471,47 @@ def linspace(start, stop, num, dtype=None, name=None): """ if dtype is None: dtype = 'float32' + tensor_num = num + tensor_start = start + tensor_stop = stop + if not isinstance(dtype, core.VarDesc.VarType): + dtype = convert_np_dtype_to_dtype_(dtype) if not isinstance(start, Variable): - start = fill_constant([1], dtype, start) + tensor_start = fill_constant([1], dtype, start) if not isinstance(stop, Variable): - stop = fill_constant([1], dtype, stop) + tensor_stop = fill_constant([1], dtype, stop) if not isinstance(num, Variable): - num = fill_constant([1], 'int32', num) + tensor_num = fill_constant([1], 'int32', num) if in_dygraph_mode(): - return core.ops.linspace(start, stop, num) + return core.ops.linspace(tensor_start, tensor_stop, tensor_num, 'dtype', + dtype) helper = LayerHelper("linspace", **locals()) - check_dtype(start.dtype, 'start', ['float32', 'float64'], 'linspace') - check_dtype(stop.dtype, 'stop', ['float32', 'float64'], 'linspace') - check_dtype(num.dtype, 'num', ['int32', 'int64'], 'linspace') - check_dtype(dtype, 'dtype', ['float32', 'float64'], 'linspace') + if isinstance(start, Variable): + check_dtype(start.dtype, 'start', (convert_dtype(dtype)), 'linspace') + else: + check_type(start, 'start', (int, float), 'linspace') - out = helper.create_variable_for_type_inference(dtype=start.dtype) + if isinstance(stop, Variable): + check_dtype(stop.dtype, 'stop', (convert_dtype(dtype)), 'linspace') + else: + check_type(stop, 'stop', (int, float), 'linspace') + if isinstance(num, Variable): + check_dtype(num.dtype, 'num', ['int32'], 'linspace') + else: + check_type(num, 'num', (int), 'linspace') + check_dtype(dtype, 'dtype', ['int32', 'int64', 'float32', 'float64'], + 'linspace') + + out = helper.create_variable_for_type_inference(dtype=dtype) helper.append_op( type='linspace', - inputs={'Start': start, - 'Stop': stop, - 'Num': num}, + inputs={'Start': tensor_start, + 'Stop': tensor_stop, + 'Num': tensor_num}, + attrs={'dtype': dtype}, outputs={'Out': [out]}) return out @@ -1537,6 +1559,7 @@ def zeros_like(x, out=None): return out +@deprecated(since="2.0.0", update_to="paddle.diag") def diag(diagonal): """ :alias_main: paddle.diag @@ -1598,7 +1621,7 @@ def eye(num_rows, If None, default: num_rows. batch_shape(list, optional): If provided, the returned tensor will have a leading batch size of this shape, the data type of ``batch_shape`` is int. Default is None. - dtype(np.dtype|core.VarDesc.VarType|str, optional): The data type of the returned tensor. + dtype(np.dtype|str, optional): The data type of the returned tensor. It should be int32, int64, float16, float32, float64, default is 'float32'. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index c84d2ac3796efe9d16641552f1be939a666aa4cf..8f34576b836a5412a6792a6dfd63b3c9fd8de560 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -40,6 +40,7 @@ from paddle.fluid.layers import tensor from functools import reduce from .wrapped_decorator import signature_safe_contextmanager from .. import compat as cpt +import paddle __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad', @@ -60,21 +61,23 @@ class Optimizer(object): but need to use one of it's implementation. """ - @imperative_base.no_grad + @imperative_base.no_grad() def __init__(self, learning_rate, parameter_list=None, regularization=None, grad_clip=None, name=None): + # Because of the loop import, so place it in the function body + from paddle.optimizer.lr_scheduler import _LRScheduler self._parameter_list = list( parameter_list) if parameter_list is not None else None self._name = name if framework.in_dygraph_mode(): - if not isinstance(learning_rate, float) and \ - not isinstance(learning_rate, LearningRateDecay): + if not isinstance(learning_rate, + (float, LearningRateDecay, _LRScheduler)): raise TypeError( - "learning rate should be float or LearningRateDecay, got %s here" + "learning rate should be float or _LRScheduler, got %s here" % type(learning_rate)) if self._parameter_list is None: raise AttributeError( @@ -89,11 +92,11 @@ class Optimizer(object): % regularization.__str__()) break else: - if not isinstance(learning_rate, float) and \ - not isinstance(learning_rate, framework.Variable): + if not isinstance(learning_rate, + (float, framework.Variable, _LRScheduler)): raise TypeError( - "learning rate should be float or Variable, got %s here" % - type(learning_rate)) + "learning rate should be float or _LRScheduler, got %s here" + % type(learning_rate)) if grad_clip is not None: if not isinstance(grad_clip, GradientClipBase): @@ -143,11 +146,15 @@ class Optimizer(object): state_dict = adam.state_dict() ''' + from paddle.optimizer.lr_scheduler import _LRScheduler state_dict = {} for k, v in self._accumulators.items(): for para_name, var_tmp in v.items(): state_dict[var_tmp.name] = var_tmp # global step if use lr decay + if isinstance(self._learning_rate, _LRScheduler): + state_dict["LR_Scheduler"] = self._learning_rate.state_dict() + return state_dict if isinstance(self._learning_rate, LearningRateDecay): state_dict["LR_Scheduler"] = self._learning_rate.state_dict() @@ -191,6 +198,9 @@ class Optimizer(object): adam.set_dict(opti_state_dict) ''' + from paddle.optimizer.lr_scheduler import _LRScheduler + if isinstance(self._learning_rate, _LRScheduler): + self._learning_rate.set_dict(state_dict["LR_Scheduler"]) if isinstance(self._learning_rate, LearningRateDecay): self._learning_rate.set_dict(state_dict["LR_Scheduler"]) @@ -251,6 +261,30 @@ class Optimizer(object): return self._opti_name_list def _create_global_learning_rate(self): + from paddle.optimizer.lr_scheduler import _LRScheduler + if isinstance(self._learning_rate, _LRScheduler): + lr_var = self._global_learning_rate() + # only create global lr_var once + if not isinstance(lr_var, framework.Variable): + lr_name = unique_name.generate('learning_rate') + self._learning_rate._var_name = lr_name + lr_var = self.helper.create_global_variable( + name=lr_name, + shape=[1], + persistable=True, + stop_gradient=True, + dtype='float32' if self._dtype is None else self._dtype) + main_prog = framework.default_main_program() + main_prog.lr_sheduler = self._learning_rate + main_prog.lr_var = lr_var + self._learning_rate_map[framework.default_main_program( + )] = lr_var + + lr_value = float(self._learning_rate()) + self.helper.set_variable_initializer( + lr_var, initializer=Constant(value=lr_value)) + return + if imperative_base.enabled(): # create learning rate Variable if isinstance(self._learning_rate, float): @@ -754,7 +788,7 @@ class Optimizer(object): params_grads = append_backward(loss, parameter_list, act_no_grad_set, callbacks) # Note: since we can't use all_reduce_op now, - # dgc_op should be the last op of one grad. + # dgc_op should be the last op of one grad. self._append_dgc_ops(params_grads) return params_grads @@ -863,7 +897,7 @@ class Optimizer(object): if p.trainable: p.clear_gradient() - @imperative_base.no_grad + @imperative_base.no_grad() def minimize(self, loss, startup_program=None, @@ -981,7 +1015,7 @@ class SGDOptimizer(Optimizer): name=name) self.type = "sgd" - @no_grad + @no_grad() def _append_optimize_op(self, block, param_and_grad): lr = self._create_param_lr(param_and_grad) if framework.in_dygraph_mode(): @@ -1141,7 +1175,7 @@ class MomentumOptimizer(Optimizer): class DGCMomentumOptimizer(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887 @@ -1518,7 +1552,7 @@ class DGCMomentumOptimizer(Optimizer): dgc_op._set_attr(op_maker.kOpRoleVarAttrName(), [param_var.name, grad_var.name]) - @imperative_base.no_grad + @imperative_base.no_grad() def apply_gradients(self, params_grads): params_grads = sorted(params_grads, key=lambda x: x[0].name) params_grads, table_param_and_grad, table_optimize_op = \ @@ -3067,7 +3101,7 @@ Lamb = LambOptimizer class ModelAverage(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph The ModelAverage optimizer accumulates specific continuous historical parameters during training. The accumulated historical range can be controlled by the passed @@ -3376,7 +3410,7 @@ class ModelAverage(Optimizer): class ExponentialMovingAverage(object): """ - :api_attr: Static Graph + :api_attr: Static Graph Compute the moving average of parameters with exponential decay. Given a parameter :math:`\\theta`, its exponential moving average (EMA) @@ -3626,7 +3660,7 @@ class ExponentialMovingAverage(object): class PipelineOptimizer(object): """ - :api_attr: Static Graph + :api_attr: Static Graph Pipeline Optimizer: Make a program to run as pipeline, that is splitting a program into multiple sections (sub-programs) and each section run on a @@ -3690,7 +3724,8 @@ class PipelineOptimizer(object): def __init__(self, optimizer, num_microbatches=1, start_cpu_core_id=0): if framework.in_dygraph_mode(): raise Exception("In dygraph, don't support PipelineOptimizer.") - if not isinstance(optimizer, Optimizer): + if not isinstance(optimizer, Optimizer) and not isinstance( + optimizer, paddle.optimizer.Optimizer): raise ValueError("The 'optimizer' parameter for " "PipelineOptimizer must be an instance of " "Optimizer, but the given type is {}.".format( @@ -4477,7 +4512,7 @@ class PipelineOptimizer(object): class RecomputeOptimizer(Optimizer): """ - :api_attr: Static Graph + :api_attr: Static Graph Recompute Optimizer Wrapper @@ -4562,7 +4597,7 @@ class RecomputeOptimizer(Optimizer): def load(self, stat_dict): """ - :api_attr: Static Graph + :api_attr: Static Graph load function is not supported by Recompute Optimizer for now. :return: None @@ -4786,7 +4821,7 @@ class RecomputeOptimizer(Optimizer): class LookaheadOptimizer(object): """ - :api_attr: Static Graph + :api_attr: Static Graph This implements the Lookahead optimizer of the paper : https://arxiv.org/abs/1907.08610. @@ -4929,6 +4964,11 @@ class LookaheadOptimizer(object): mod = layers.elementwise_mod(step, k) with layers.control_flow.Switch() as switch: + with switch.case(step == one_var): + for param_name in params: + fast_var = main_block.var(param_name) + slow_var = param_to_slow[param_name] + layers.assign(input=fast_var, output=slow_var) with switch.case(mod == zero_var): for param_name in params: fast_var = main_block.var(param_name) diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index a45443632b04835bc8f3b3f2f167433c7a8b49d4..8e0470bededd4fdb8aec03893590bdba35bbb364 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -204,6 +204,9 @@ class WeightNormParamAttr(ParamAttr): """ :api_attr: Static Graph + Note: + Please use 'paddle.nn.utils.weight_norm' in dygraph mode. + Parameter of weight Norm. Weight Norm is a reparameterization of the weight vectors in a neural network that decouples the magnitude of those weight vectors from their direction. Weight Norm has been implemented as discussed in this @@ -216,6 +219,7 @@ class WeightNormParamAttr(ParamAttr): It is recommended to use ``minimize(loss, grad_clip=clip)`` to clip gradient. There are three clipping strategies: :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , :ref:`api_fluid_clip_GradientClipByValue` . + Args: dim(int): Dimension over which to compute the norm. Dim is a non-negative diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 7e633756fce64a8f2f85be596cf11904d285c49a..76c95be75d67d60cd59efe13ecba6f01a1c1d614 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -1039,7 +1039,7 @@ class GeneratorLoader(DataLoaderBase): self._reader = core.create_py_reader( self.queue, self._var_names, self._shapes, self._dtypes, self._need_check_feed, self._places, self._use_double_buffer, - self._drop_last, True) + self._drop_last, False) def _init_non_iterable(self): lod_levels = [] diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index f6ac452c82c66132d8e0b41647a05daaa3de2a74..10adb63c9cb1a4395d4df3f90b36c86da4a1e318 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -33,6 +33,8 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input) list(APPEND MIXED_DIST_TEST_OPS test_fleet_checkpoint) list(APPEND MIXED_DIST_TEST_OPS test_collective_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3) list(APPEND MIXED_DIST_TEST_OPS test_fleet_recompute_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_pipeline_meta_optimizer) @@ -44,6 +46,7 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_lamb_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_dgc_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_private_function) list(APPEND MIXED_DIST_TEST_OPS test_fleet_graph_executor) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_meta_optimizer_base) foreach(TEST_OP ${MIXED_DIST_TEST_OPS}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() @@ -53,6 +56,8 @@ if(NOT WITH_GPU OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_allgather) LIST(REMOVE_ITEM TEST_OPS test_allreduce) LIST(REMOVE_ITEM TEST_OPS test_broadcast) + LIST(REMOVE_ITEM TEST_OPS test_collective_reduce) + LIST(REMOVE_ITEM TEST_OPS test_collective_scatter) LIST(REMOVE_ITEM TEST_OPS test_reducescatter) LIST(REMOVE_ITEM TEST_OPS test_reducescatter_api) endif() @@ -90,10 +95,16 @@ endif() LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint) +LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) +LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint3) +LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint_multiple) +LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint_dist_basic) +LIST(REMOVE_ITEM TEST_OPS test_hdfs1) +LIST(REMOVE_ITEM TEST_OPS test_hdfs2) +LIST(REMOVE_ITEM TEST_OPS test_hdfs3) LIST(REMOVE_ITEM TEST_OPS test_checkpoint_saver) if(APPLE OR WIN32) - LIST(REMOVE_ITEM TEST_OPS test_hdfs) LIST(REMOVE_ITEM TEST_OPS test_fs_interface) LIST(REMOVE_ITEM TEST_OPS test_fleet_metric) endif() @@ -106,6 +117,7 @@ if (NOT ${WITH_GPU}) list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer) + LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) elseif(${CUDNN_VERSION} VERSION_LESS 7100) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) @@ -191,6 +203,7 @@ function(py_test_modules TARGET_NAME) endif() endfunction() + function(bash_test_modules TARGET_NAME) if(NOT WITH_TESTING) return() @@ -233,6 +246,51 @@ function(bash_test_modules TARGET_NAME) endif() endfunction() +function(parallel_bash_test_modules TARGET_NAME) + if(NOT WITH_TESTING) + return() + endif() + + set(options SERIAL) + set(oneValueArgs TIMEOUT START_BASH) + set(multiValueArgs DEPS ENVS LABELS UnitTests) + cmake_parse_arguments(parallel_bash_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + + set(timeout 120) + if(${parallel_bash_test_modules_TIMEOUT}) + set(timeout ${parallel_bash_test_modules_TIMEOUT}) + endif() + + list(JOIN parallel_bash_test_modules_UnitTests " " uts_string) + + if(WITH_COVERAGE) + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} + WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + bash ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + else() + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} + bash ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() + + if (parallel_bash_test_modules_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) + endif() + + if(parallel_bash_test_modules_LABELS) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT ${timeout} LABELS ${parallel_bash_test_modules_LABELS}) + else() + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT ${timeout}) + endif() +endfunction() + + list(REMOVE_ITEM TEST_OPS test_warpctc_op) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_profiler) @@ -267,6 +325,9 @@ list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) +list(REMOVE_ITEM TEST_OPS test_sampling_id_op) + + if (APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_dataset) list(REMOVE_ITEM TEST_OPS test_dataset_dataloader) @@ -359,17 +420,21 @@ if(WITH_DISTRIBUTE) list(REMOVE_ITEM DIST_TEST_OPS "test_dist_base") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_base") - # FIXME(seiriosX) will readd after PR 22957 Merged + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_lars") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_train") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_save_load") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_simnet_bow") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_simnet_bow") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_text_classification") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec") + # FIXME(seiriosX) will fix this + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_sparse_embedding_ctr") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo") + py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) py_test_modules(test_transpiler_ops MODULES test_transpiler_ops ENVS ${dist_ENVS}) py_test_modules(test_communicator_async MODULES test_communicator_async ENVS ${dist_ENVS}) @@ -379,6 +444,8 @@ if(WITH_DISTRIBUTE) py_test_modules(test_collective_optimizer MODULES test_collective_optimizer) if(NOT APPLE) py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS}) + py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS ${dist_ENVS}) + py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS ${dist_ENVS}) py_test_modules(test_fleet_recompute_meta_optimizer MODULES test_fleet_recompute_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_graph_execution_meta_optimizer MODULES test_fleet_graph_execution_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_graph_executor MODULES test_fleet_graph_executor ENVS ${dist_ENVS}) @@ -386,6 +453,7 @@ if(WITH_DISTRIBUTE) py_test_modules(test_fleet_amp_meta_optimizer MODULES test_fleet_amp_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_pipeline_meta_optimizer MODULES test_fleet_pipeline_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_private_function MODULES test_fleet_private_function ENVS ${dist_ENVS}) + py_test_modules(test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS ${dist_ENVS}) if(NOT WIN32) py_test_modules(test_fleet_localsgd_meta_optimizer MODULES test_fleet_localsgd_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_lars_meta_optimizer MODULES test_fleet_lars_meta_optimizer ENVS ${dist_ENVS}) @@ -454,9 +522,9 @@ if(NOT WIN32) endif() if(NOT APPLE AND NOT WIN32) - bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 600) - bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 600) - bash_test_modules(test_checkpoint_saver START_BASH dist_test.sh TIMEOUT 600) + parallel_bash_test_modules(test_acp START_BASH parallel_test.sh TIMEOUT 140 UnitTests test_auto_checkpoint test_auto_checkpoint1 test_auto_checkpoint2 test_auto_checkpoint3) + parallel_bash_test_modules(test_acp2 START_BASH parallel_test.sh TIMEOUT 140 UnitTests test_auto_checkpoint_multiple test_auto_checkpoint_dist_basic) + parallel_bash_test_modules(test_hdfs START_BASH parallel_test.sh TIMEOUT 120 UnitTests test_hdfs1 test_hdfs2 test_hdfs3) endif() add_subdirectory(sequence) @@ -501,3 +569,11 @@ if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_multiprocess_dataloader_iterable_dataset_dynamic PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") endif() + +# setting timeout value for old unittests +# set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT 200) +set_tests_properties(test_fused_elemwise_activation_op PROPERTIES TIMEOUT 150) +set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) +set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 150) +set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) +set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) diff --git a/python/paddle/fluid/tests/unittests/__init__.py b/python/paddle/fluid/tests/unittests/__init__.py index b94a21a7e406b833797f8f521c62a2351c2bc30a..193b91cdaa13293ca920a8b79826bb71657c5d56 100644 --- a/python/paddle/fluid/tests/unittests/__init__.py +++ b/python/paddle/fluid/tests/unittests/__init__.py @@ -10,4 +10,15 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. +# limitations under the License.p + +# Note: On Windows, import form subdirectories such as dirA()->dirB(), current directory +# will still be dirA(), But is should be dirB(). So it will ModulNotFoundError +# please refer to https://stackoverflow.com/questions/8953844/import-module-from-subfolder + +import os +if os.name == 'nt': + import sys + dirname, filename = os.path.split(os.path.abspath(__file__)) + sys.path.insert(0, dirname) + print(sys.path) diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py index 812730e9523f8d24ade68474b858e04b41fc6895..529ff4ec45d1fdc6d1d8e765e38cff53d36aade7 100644 --- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py +++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py @@ -30,11 +30,11 @@ from paddle.fluid import unique_name import numpy as np from paddle.io import Dataset, BatchSampler, DataLoader -BATCH_NUM = 20 -BATCH_SIZE = 16 +BATCH_NUM = 4 +BATCH_SIZE = 1 #IMAGE_SIZE = 128 -CLASS_NUM = 10 +CLASS_NUM = 2 USE_GPU = False # whether use GPU to run model places = fluid.cuda_places() if USE_GPU else fluid.cpu_places() @@ -59,7 +59,7 @@ def sample_list_generator_creator(): for _ in range(BATCH_NUM): sample_list = [] for _ in range(BATCH_SIZE): - image, label = get_random_images_and_labels([16, 16], [1]) + image, label = get_random_images_and_labels([4, 4], [1]) sample_list.append([image, label]) yield sample_list @@ -75,8 +75,7 @@ class AutoCheckpointBase(unittest.TestCase): minimize=True, iterable=True): def simple_net(): - image = fluid.data( - name='image', shape=[-1, 16, 16], dtype='float32') + image = fluid.data(name='image', shape=[-1, 4, 4], dtype='float32') label = fluid.data(name='label', shape=[-1, 1], dtype='int64') fc_tmp = fluid.layers.fc(image, size=CLASS_NUM) diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/collective_reduce_op.py new file mode 100644 index 0000000000000000000000000000000000000000..da61284344b58d44c5ba02af5ed42c553f857c94 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op.py @@ -0,0 +1,70 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_base import TestCollectiveRunnerBase, runtime_main + + +class TestCollectiveReduce(TestCollectiveRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program): + ring_id = 0 + rootid = 1 + with fluid.program_guard(main_prog, startup_program): + tindata = layers.data( + name="tindata", shape=[10, 1000], dtype='float32') + toutdata = main_prog.current_block().create_var( + name="outofreduce", + dtype='float32', + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + main_prog.global_block().append_op( + type="c_reduce_sum", + inputs={'X': tindata}, + attrs={'ring_id': ring_id, + 'root_id': rootid}, + outputs={'Out': toutdata}) + main_prog.global_block().append_op( + type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) + return toutdata + + +if __name__ == "__main__": + runtime_main(TestCollectiveReduce, "reduce", 0) diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py new file mode 100644 index 0000000000000000000000000000000000000000..7e6904286234364e7ae84a5c21b9826885f99dc4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py @@ -0,0 +1,73 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_base import TestCollectiveRunnerBase, runtime_main + + +class TestCollectiveReduce(TestCollectiveRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program): + ring_id = 0 + rootid = 1 + with fluid.program_guard(main_prog, startup_program): + tindata = layers.data( + name="tindata", shape=[10, 1000], dtype='float32') + toutdata = main_prog.current_block().create_var( + name="outofreduce", + dtype='float32', + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + main_prog.global_block().append_op( + type="c_reduce_sum", + inputs={'X': tindata}, + attrs={ + 'ring_id': ring_id, + 'use_calc_stream': True, + 'root_id': rootid + }, + outputs={'Out': toutdata}) + main_prog.global_block().append_op( + type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) + return toutdata + + +if __name__ == "__main__": + runtime_main(TestCollectiveReduce, "reduce", 0) diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_op.py b/python/paddle/fluid/tests/unittests/collective_scatter_op.py new file mode 100644 index 0000000000000000000000000000000000000000..efe5e17bcce1ecddf859edbb3543876fe5fc9f89 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/collective_scatter_op.py @@ -0,0 +1,71 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import argparse +import os +import sys +import signal +import time +import socket +from contextlib import closing +from six import string_types +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle.fluid.unique_name as nameGen +from paddle.fluid import core +import unittest +from multiprocessing import Process +import paddle.fluid.layers as layers +from functools import reduce +from test_collective_base import TestCollectiveRunnerBase, runtime_main + + +class TestCollectiveScatter(TestCollectiveRunnerBase): + def __init__(self): + self.global_ring_id = 0 + + def get_model(self, main_prog, startup_program): + ring_id = 0 + rootid = 1 + with fluid.program_guard(main_prog, startup_program): + tindata = layers.data( + name="tindata", shape=[10, 1000], dtype='float32') + toutdata = main_prog.current_block().create_var( + name="outofreduce", + dtype='float32', + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + main_prog.global_block().append_op( + type="c_scatter", + inputs={'X': tindata}, + attrs={'ring_id': ring_id, + 'root': rootid, + 'nranks': 2}, + outputs={'Out': toutdata}) + main_prog.global_block().append_op( + type="c_sync_comm_stream", + inputs={'X': toutdata}, + outputs={'Out': toutdata}, + attrs={'ring_id': ring_id}) + return toutdata + + +if __name__ == "__main__": + runtime_main(TestCollectiveScatter, "scatter", 0) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index cb0fd12c22b82087ba8e19dc94351e9964802884..73b546b95cfeb8032c6e99eabe24c883d1f5f66c 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -162,30 +162,24 @@ class TestDistCTR2x2(FleetDistRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) fleet.init_worker() - exe.run(fleet.startup_program) - + exe.run(fluid.default_startup_program()) batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) - compiled_prog = fluid.compiler.CompiledProgram( - fleet.main_program).with_data_parallel( - loss_name=self.avg_cost.name, - build_strategy=self.strategy.get_build_strategy(), - exec_strategy=self.strategy.get_execute_strategy()) - for epoch_id in range(1): self.reader.start() try: pass_start = time.time() while True: - loss_val = exe.run(program=compiled_prog, + loss_val = exe.run(program=fluid.default_main_program(), fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - reduce_output = fleet_util.all_reduce( - np.array(loss_val), mode="sum") - loss_all_trainer = fleet_util.all_gather(float(loss_val)) - loss_val = float(reduce_output) / len(loss_all_trainer) + # TODO(randomly fail) + # reduce_output = fleet_util.all_reduce( + # np.array(loss_val), mode="sum") + # loss_all_trainer = fleet_util.all_gather(float(loss_val)) + # loss_val = float(reduce_output) / len(loss_all_trainer) message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, loss_val) fleet_util.print_on_rank(message, 0) @@ -208,7 +202,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) fleet.init_worker() - exe.run(fleet.startup_program) + exe.run(fluid.default_startup_program()) thread_num = 2 batch_size = 128 @@ -230,7 +224,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): pass_start = time.time() dataset.set_filelist(filelist) exe.train_from_dataset( - program=fleet.main_program, + program=fluid.default_main_program(), dataset=dataset, fetch_list=[self.avg_cost], fetch_info=["cost"], diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..03d0fa447daf3e3a502e7d77491045f92695496c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -0,0 +1,152 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Distribute CTR model for test fleet api +""" + +from __future__ import print_function + +import shutil +import tempfile +import time + +import paddle +import paddle.fluid as fluid +import os +import numpy as np + +import ctr_dataset_reader +from test_dist_fleet_base import runtime_main, FleetDistRunnerBase +from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader +from paddle.distributed.fleet.base.util_factory import fleet_util + +# Fix seed for test +fluid.default_startup_program().random_seed = 1 +fluid.default_main_program().random_seed = 1 + + +class TestDistGpuPsCTR2x2(TestDistCTR2x2): + """ + For test CTR model, using Fleet api & PS-GPU + """ + + def check_model_right(self, dirname): + model_filename = os.path.join(dirname, "__model__") + + with open(model_filename, "rb") as f: + program_desc_str = f.read() + + program = fluid.Program.parse_from_string(program_desc_str) + with open(os.path.join(dirname, "__model__.proto"), "w") as wn: + wn.write(str(program)) + + def do_pyreader_training(self, fleet): + """ + do training using dataset, using fetch handler to catch variable + Args: + fleet(Fleet api): the fleet object of Parameter Server, define distribute training role + """ + device_id = int(os.getenv("FLAGS_selected_gpus", "0")) + place = fluid.CUDAPlace(device_id) + exe = fluid.Executor(place) + fleet.init_worker() + exe.run(fleet.startup_program) + + batch_size = 4 + train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) + self.reader.decorate_sample_list_generator(train_reader) + + for epoch_id in range(1): + self.reader.start() + try: + pass_start = time.time() + while True: + loss_val = exe.run(program=fleet.main_program, + fetch_list=[self.avg_cost.name]) + loss_val = np.mean(loss_val) + reduce_output = fleet_util.all_reduce( + np.array(loss_val), mode="sum") + loss_all_trainer = fleet_util.all_gather(float(loss_val)) + loss_val = float(reduce_output) / len(loss_all_trainer) + message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, + loss_val) + fleet_util.print_on_rank(message, 0) + + pass_time = time.time() - pass_start + except fluid.core.EOFException: + self.reader.reset() + + model_dir = tempfile.mkdtemp() + fleet.save_inference_model( + exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) + self.check_model_right(model_dir) + if fleet.is_first_worker(): + fleet.save_persistables(executor=exe, dirname=model_dir) + shutil.rmtree(model_dir) + fleet.stop_worker() + + def do_dataset_training(self, fleet): + dnn_input_dim, lr_input_dim, train_file_path = ctr_dataset_reader.prepare_data( + ) + + device_id = int(os.getenv("FLAGS_selected_gpus", "0")) + place = fluid.CUDAPlace(device_id) + exe = fluid.Executor(place) + + fleet.init_worker() + exe.run(fleet.startup_program) + + thread_num = 2 + batch_size = 128 + filelist = [] + for _ in range(thread_num): + filelist.append(train_file_path) + + # config dataset + dataset = paddle.fleet.DatasetFactory().create_dataset() + dataset.set_batch_size(batch_size) + dataset.set_use_var(self.feeds) + pipe_command = 'python ctr_dataset_reader.py' + dataset.set_pipe_command(pipe_command) + + dataset.set_filelist(filelist) + dataset.set_thread(thread_num) + + for epoch_id in range(1): + pass_start = time.time() + dataset.set_filelist(filelist) + exe.train_from_dataset( + program=fleet.main_program, + dataset=dataset, + fetch_list=[self.avg_cost], + fetch_info=["cost"], + print_period=2, + debug=int(os.getenv("Debug", "0"))) + pass_time = time.time() - pass_start + + if os.getenv("SAVE_MODEL") == "1": + model_dir = tempfile.mkdtemp() + fleet.save_inference_model(exe, model_dir, + [feed.name for feed in self.feeds], + self.avg_cost) + self.check_model_right(model_dir) + if fleet.is_first_worker(): + fleet.save_persistables(executor=exe, dirname=model_dir) + shutil.rmtree(model_dir) + + fleet.stop_worker() + + +if __name__ == "__main__": + runtime_main(TestDistGpuPsCTR2x2) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py b/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py new file mode 100644 index 0000000000000000000000000000000000000000..7e811408291a0a3f784ff2b744ce616d6bfbe767 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dist_fleet_debug_gloo.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import os +import time +import numpy as np +import logging +import paddle +import paddle.fluid as fluid +#import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet +from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerConfig +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) +#role = role_maker.GeneralRoleMaker( +#init_timeout_seconds=100, +#run_timeout_seconds=100, +#http_ip_port="127.0.0.1:26001") + +#role = role_maker.PaddleCloudRoleMaker(http_ip_port="127.0.0.1:26001") + +#role = role_maker.GeneralRoleMaker(path="./tmp4") +logger.info("Begin") +res = [0, 0] + +logger.info(res) + +role = role_maker.PaddleCloudRoleMaker(path="./tmp4") + +fleet.init(role) +print("init wancheng") # +#if fleet.is_worker(): +# import time +# time.sleep(3) + +a = [5] +b = [2] +res = [0] +if fleet.worker_index() == 0: + role._all_reduce(role._node_type_comm, a) +elif fleet.worker_index() == 1: + role._all_reduce(role._node_type_comm, b) + +#logger.info(res) +#print("res ", res) + +#role._barrier_all() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index c69e1247a9bb8f97350ae79bcc6df1bc645204ea..77697896b4d556da8a98c17e281b3d7a6999fd64 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -152,24 +152,18 @@ class TestDistCTR2x2(FleetDistRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) fleet.init_worker() - exe.run(fleet.startup_program) + exe.run(fluid.default_startup_program()) batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) - compiled_prog = fluid.compiler.CompiledProgram( - fleet.main_program).with_data_parallel( - loss_name=self.avg_cost.name, - build_strategy=self.strategy.get_build_strategy(), - exec_strategy=self.strategy.get_execute_strategy()) - for epoch_id in range(1): self.reader.start() try: while True: - loss_val = exe.run(program=compiled_prog, + loss_val = exe.run(program=fluid.default_main_program(), fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) print("TRAIN ---> pass: {} loss: {}\n".format(epoch_id, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..75bff108dd43665df0fc1c8b166a935946b4fbc7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +import numpy as np +import paddle +import paddle.fluid as fluid + +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import create_paddle_predictor + + +class PredictorTools(object): + ''' + Paddle-Inference predictor + ''' + + def __init__(self, model_path, params_file, feeds_var): + ''' + __init__ + ''' + self.model_path = model_path + self.params_file = params_file + + self.feeds_var = feeds_var + + def _load_model_and_set_config(self): + ''' + load model from file and set analysis config + ''' + if os.path.exists(os.path.join(self.model_path, self.params_file)): + config = AnalysisConfig( + os.path.join(self.model_path, "__model__"), + os.path.join(self.model_path, self.params_file)) + else: + config = AnalysisConfig(os.path.join(self.model_path)) + + if fluid.is_compiled_with_cuda(): + config.enable_use_gpu(100, 0) + else: + config.disable_gpu() + config.switch_specify_input_names(True) + config.switch_use_feed_fetch_ops(False) + config.enable_memory_optim() + config.disable_glog_info() + config.switch_ir_optim(True) + + return config + + def _get_analysis_outputs(self, config): + ''' + Return outputs of paddle inference + Args: + config (AnalysisConfig): predictor configs + Returns: + outs (numpy array): forward netwrok prediction outputs + ''' + predictor = create_paddle_predictor(config) + tensor_shapes = predictor.get_input_tensor_shape() + names = predictor.get_input_names() + for i, name in enumerate(names): + #assert name in self.feeds_var, '{} not in feeded dict'.format(name) + shape = tensor_shapes[name] + tensor = predictor.get_input_tensor(name) + feed_data = self.feeds_var[i] + tensor.copy_from_cpu(np.array(feed_data)) + if type(feed_data) == fluid.LoDTensor: + tensor.set_lod(feed_data.lod()) + + # ensure no diff in multiple repeat times + repeat_time = 10 + for i in range(repeat_time): + predictor.zero_copy_run() + + output_names = predictor.get_output_names() + outs = [ + predictor.get_output_tensor(out_name).copy_to_cpu() + for out_name in output_names + ] + + return outs + + def __call__(self): + ''' + __call__ + ''' + config = self._load_model_and_set_config() + outputs = self._get_analysis_outputs(config) + + return outputs diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py index 27777a62799e104ac8a08fd67df8bdbe2a256724..f105dd5e94744ecca96ee0282432ff4946ab5e04 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py @@ -23,6 +23,8 @@ from paddle.fluid.dygraph.io import VARIABLE_FILENAME from bert_dygraph_model import PretrainModelLayer from bert_utils import get_bert_config, get_feed_data_reader +from predictor_utils import PredictorTools + program_translator = ProgramTranslator() place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( ) @@ -152,6 +154,12 @@ def predict_dygraph_jit(data): return pred_res +def predict_analysis_inference(data): + output = PredictorTools(MODEL_SAVE_PATH, VARIABLE_FILENAME, data) + out = output() + return out + + class TestBert(unittest.TestCase): def setUp(self): self.bert_config = get_bert_config() @@ -178,9 +186,11 @@ class TestBert(unittest.TestCase): dygraph_pred_res = predict_dygraph(self.bert_config, data) static_pred_res = predict_static(data) dygraph_jit_pred_res = predict_dygraph_jit(data) + predictor_pred_res = predict_analysis_inference(data) - for dy_res, st_res, dy_jit_res in zip( - dygraph_pred_res, static_pred_res, dygraph_jit_pred_res): + for dy_res, st_res, dy_jit_res, predictor_res in zip( + dygraph_pred_res, static_pred_res, dygraph_jit_pred_res, + predictor_pred_res): self.assertTrue( np.allclose(st_res, dy_res), "dygraph_res: {},\n static_res: {}".format( @@ -191,6 +201,11 @@ class TestBert(unittest.TestCase): "dygraph_jit_res: {},\n static_res: {}".format( dy_jit_res[~np.isclose(st_res, dy_jit_res)], st_res[~np.isclose(st_res, dy_jit_res)])) + self.assertTrue( + np.allclose(st_res, predictor_res), + "dygraph_jit_res: {},\n static_res: {}".format( + predictor_res[~np.isclose(st_res, predictor_res)], + st_res[~np.isclose(st_res, predictor_res)])) break diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index c01705dbe9ba655d9cfb538dfdde0474ffa30855..a8cef6e28a6f496d67b5409a8506f5af64266144 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -22,6 +22,8 @@ from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph.io import VARIABLE_FILENAME +from predictor_utils import PredictorTools + SEED = 2020 DATATYPE = 'float32' program_translator = ProgramTranslator() @@ -693,9 +695,11 @@ class TestTrain(unittest.TestCase): static_pred_res = self.predict_static(video_data) dygraph_pred_res = self.predict_dygraph(video_data) dygraph_jit_pred_res = self.predict_dygraph_jit(video_data) + predictor_pred_res = self.predict_analysis_inference(video_data) - for dy_res, st_res, dy_jit_res in zip( - dygraph_pred_res, static_pred_res, dygraph_jit_pred_res): + for dy_res, st_res, dy_jit_res, predictor_res in zip( + dygraph_pred_res, static_pred_res, dygraph_jit_pred_res, + predictor_pred_res): self.assertTrue( np.allclose(st_res, dy_res), "dygraph_res: {},\n static_res: {}".format( @@ -706,6 +710,11 @@ class TestTrain(unittest.TestCase): "dygraph_jit_res: {},\n static_res: {}".format( dy_jit_res[~np.isclose(st_res, dy_jit_res)], st_res[~np.isclose(st_res, dy_jit_res)])) + self.assertTrue( + np.allclose(st_res, predictor_res), + "dygraph_jit_res: {},\n static_res: {}".format( + predictor_res[~np.isclose(st_res, predictor_res)], + st_res[~np.isclose(st_res, predictor_res)])) break def predict_dygraph(self, data): @@ -749,6 +758,11 @@ class TestTrain(unittest.TestCase): return pred_res + def predict_analysis_inference(self, data): + output = PredictorTools(self.args.infer_dir, VARIABLE_FILENAME, [data]) + out = output() + return out + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index fdf6daf6263e2bb7cf8ef2c3ad1373fb079f0037..0e2bac9fa5b5c9e47ce8a08b0187531a3b83dcee 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -27,6 +27,8 @@ from paddle.fluid.dygraph import Embedding, Linear, GRUUnit from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph.io import VARIABLE_FILENAME +from predictor_utils import PredictorTools + SEED = 2020 program_translator = ProgramTranslator() @@ -536,6 +538,7 @@ class TestLACModel(unittest.TestCase): dy_pre = self.predict_dygraph(batch) st_pre = self.predict_static(batch) dy_jit_pre = self.predict_dygraph_jit(batch) + predictor_pre = self.predict_analysis_inference(batch) self.assertTrue( np.allclose(dy_pre, st_pre), msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) @@ -543,6 +546,10 @@ class TestLACModel(unittest.TestCase): np.allclose(dy_jit_pre, st_pre), msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) + self.assertTrue( + np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, + st_pre)) def predict_dygraph(self, batch): words, targets, length = batch @@ -591,6 +598,14 @@ class TestLACModel(unittest.TestCase): return pred_res.numpy() + def predict_analysis_inference(self, batch): + words, targets, length = batch + + output = PredictorTools(self.args.model_save_dir, VARIABLE_FILENAME, + [words, length]) + out = output() + return out + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py index b8aa0379638fadd19b4956a56c1a3e4811558535..88513749048e4666eb7b13bd0029f9f46a05d17f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py @@ -29,6 +29,8 @@ from paddle.fluid.dygraph.jit import declarative from paddle.fluid.dygraph.io import VARIABLE_FILENAME from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator +from predictor_utils import PredictorTools + SEED = 2020 @@ -220,6 +222,10 @@ class TestMNISTWithDeclarative(TestMNIST): dygraph_infer_out = self.jit_load_and_run_inference_dygraph( infer_model_path, inputs) self.assertTrue(np.allclose(gt_out.numpy(), dygraph_infer_out)) + # load in Paddle-Inference + predictor_infer_out = self.predictor_load_and_run_inference_analysis( + infer_model_path, inputs) + self.assertTrue(np.allclose(gt_out.numpy(), predictor_infer_out)) @switch_to_static_graph def jit_load_and_run_inference_static(self, model_path, inputs): @@ -241,6 +247,11 @@ class TestMNISTWithDeclarative(TestMNIST): pred = infer_net(inputs[0]) return pred.numpy() + def predictor_load_and_run_inference_analysis(self, model_path, inputs): + output = PredictorTools(model_path, VARIABLE_FILENAME, inputs) + out = output() + return out + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index ef0f6e7f0831eea8d2f694413c5231ecea292ff4..5ec3de5871dd6787c06938a8b771f7d14e54e1e0 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -23,6 +23,8 @@ from paddle.fluid.dygraph.io import VARIABLE_FILENAME import unittest +from predictor_utils import PredictorTools + # Note: Set True to eliminate randomness. # 1. For one operation, cuDNN has several algorithms, # some algorithm results are non-deterministic, like convolution algorithms. @@ -550,6 +552,12 @@ def predict_dygraph_jit(args, data): return pred_res.numpy() +def predict_analysis_inference(args, data): + output = PredictorTools(args.model_save_path, VARIABLE_FILENAME, [data]) + out = output() + return out + + class TestMobileNet(unittest.TestCase): def setUp(self): self.args = Args() @@ -577,12 +585,18 @@ class TestMobileNet(unittest.TestCase): dy_pre = predict_dygraph(self.args, image) st_pre = predict_static(self.args, image) dy_jit_pre = predict_dygraph_jit(self.args, image) + predictor_pre = predict_analysis_inference(self.args, image) self.assertTrue( np.allclose(dy_pre, st_pre), msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) self.assertTrue( np.allclose(dy_jit_pre, st_pre), msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) + self.assertTrue( + np.allclose( + predictor_pre, st_pre, atol=1e-5), + msg="inference_pred_res:\n {}\n, st_pre: \n{}.".format( + predictor_pre, st_pre)) def test_mobile_net(self): # MobileNet-V1 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 90d210eba1e0fb1eeaf5eb0c8cbc0ff46c35328f..46eb2b42e9265ac7f6340ee0be3a7127e5246eef 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -26,12 +26,15 @@ from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D from paddle.fluid.dygraph.io import VARIABLE_FILENAME +from predictor_utils import PredictorTools + SEED = 2020 IMAGENET1000 = 1281167 base_lr = 0.001 momentum_rate = 0.9 l2_decay = 1e-4 -batch_size = 8 +# NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout. +batch_size = 2 epoch_num = 1 place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() @@ -306,6 +309,12 @@ def predict_dygraph_jit(data): return pred_res.numpy() +def predict_analysis_inference(data): + output = PredictorTools(MODEL_SAVE_PATH, VARIABLE_FILENAME, [data]) + out = output() + return out + + class TestResnet(unittest.TestCase): def train(self, to_static): program_translator.enable(to_static) @@ -316,12 +325,17 @@ class TestResnet(unittest.TestCase): dy_pre = predict_dygraph(image) st_pre = predict_static(image) dy_jit_pre = predict_dygraph_jit(image) + predictor_pre = predict_analysis_inference(image) self.assertTrue( np.allclose(dy_pre, st_pre), msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) self.assertTrue( np.allclose(dy_jit_pre, st_pre), msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) + self.assertTrue( + np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, + st_pre)) def test_resnet(self): static_loss = self.train(to_static=True) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index c34e9478c8eab38c429c01db5fae460eeac6a4bd..30cba78fec19c169966e85ff43e79c3a00889616 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -26,6 +26,8 @@ from paddle.fluid.dygraph import declarative from paddle.fluid.dygraph import ProgramTranslator from paddle.fluid.dygraph.io import VARIABLE_FILENAME +from predictor_utils import PredictorTools + SEED = 2020 np.random.seed(SEED) @@ -434,6 +436,12 @@ def predict_dygraph_jit(data): return pred_res.numpy() +def predict_analysis_inference(data): + output = PredictorTools(MODEL_SAVE_PATH, VARIABLE_FILENAME, [data]) + out = output() + return out + + class TestSeResnet(unittest.TestCase): def setUp(self): self.train_reader = paddle.batch( @@ -447,12 +455,17 @@ class TestSeResnet(unittest.TestCase): dy_pre = predict_dygraph(image) st_pre = predict_static(image) dy_jit_pre = predict_dygraph_jit(image) + predictor_pre = predict_analysis_inference(image) self.assertTrue( np.allclose(dy_pre, st_pre), msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre)) self.assertTrue( np.allclose(dy_jit_pre, st_pre), msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre)) + self.assertTrue( + np.allclose(predictor_pre, st_pre), + msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre, + st_pre)) def test_check_result(self): pred_1, loss_1, acc1_1, acc5_1 = train( diff --git a/python/paddle/fluid/tests/unittests/test_hdfs.py b/python/paddle/fluid/tests/unittests/hdfs_test_utils.py similarity index 62% rename from python/paddle/fluid/tests/unittests/test_hdfs.py rename to python/paddle/fluid/tests/unittests/hdfs_test_utils.py index 75e2f5d679204c33f922ea8ee6be71a900c83cd6..6a752bc3053d7d0672bd0002250252c3bbbfa1e1 100644 --- a/python/paddle/fluid/tests/unittests/test_hdfs.py +++ b/python/paddle/fluid/tests/unittests/hdfs_test_utils.py @@ -24,7 +24,7 @@ from paddle.distributed.fleet.utils import LocalFS, HDFSClient, FSTimeOut, FSFil java_home = os.environ["JAVA_HOME"] -class FSTest(unittest.TestCase): +class FSTestBase(unittest.TestCase): def _test_dirs(self, fs): dir_path = os.path.abspath("./test_dir") fs.delete(dir_path) @@ -188,106 +188,6 @@ class FSTest(unittest.TestCase): except Exception as e: pass - def test_exists(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=15 * 1000, - sleep_inter=100) - self.assertFalse(fs.is_exist(os.path.abspath("./xxxx"))) - self.assertFalse(fs.is_dir(os.path.abspath("./xxxx"))) - self.assertTrue(fs.is_dir(os.path.abspath("./xxx/.."))) - dirs, files = fs.ls_dir(os.path.abspath("./test_hdfs.py")) - self.assertTrue(dirs == []) - self.assertTrue(len(files) == 1) - dirs, files = fs.ls_dir(os.path.abspath("./xxx/..")) - - def test_hdfs(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=15 * 1000, - sleep_inter=100) - self._test_rm(fs) - self._test_touch(fs) - self._test_dirs(fs) - self._test_upload(fs) - - self._test_download(fs) - self._test_mkdirs(fs) - self._test_list_dir(fs) - self._test_try_upload(fs) - self._test_try_download(fs) - - def test_local(self): - fs = LocalFS() - self._test_rm(fs) - self._test_touch(fs) - self._test_dirs(fs) - self._test_touch_file(fs) - self._test_mkdirs(fs) - self._test_list_dir(fs) - self._test_try_upload(fs) - self._test_try_download(fs) - - def test_timeout(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=6 * 1000, - sleep_inter=100) - src = "hdfs_test_timeout" - dst = "new_hdfs_test_timeout" - fs.delete(dst) - fs.mkdirs(src) - fs.mkdirs(dst) - fs.mkdirs(dst + "/" + src) - output = "" - try: - fs.mv(src, dst, test_exists=False) - self.assertFalse(1, "can't execute cmd:{} output:{}".format(cmd, - output)) - except FSTimeOut as e: - print("execute mv {} to {} timeout".format(src, dst)) - - cmd = "{} -mv {} {}".format(fs._base_cmd, src, dst) - ret, output = fluid.core.shell_execute_cmd(cmd, 6 * 1000, 2 * 1000) - self.assertNotEqual(ret, 0) - print("second mv ret:{} output:{}".format(ret, output)) - - def test_is_dir(self): - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - None, - time_out=15 * 1000, - sleep_inter=100) - self.assertFalse(fs.is_dir("./test_hdfs.py")) - s = """ -java.io.IOException: Input/output error - responseErrorMsg : failed to getFileStatus, errorCode: 3, path: /user/PUBLIC_KM_Data/wangxi16/data/serving_model, lparam: d868f6bb6822c621, errorMessage: inner error - at org.apache.hadoop.util.FileSystemUtil.throwException(FileSystemUtil.java:164) - at org.apache.hadoop.util.FileSystemUtil.dealWithResponse(FileSystemUtil.java:118) - at org.apache.hadoop.lite.client.LiteClientImpl.getFileStatus(LiteClientImpl.java:696) - at org.apache.hadoop.fs.LibDFileSystemImpl.getFileStatus(LibDFileSystemImpl.java:297) - at org.apache.hadoop.fs.LiteFileSystem.getFileStatus(LiteFileSystem.java:514) - at org.apache.hadoop.fs.FsShell.test(FsShell.java:1092) - at org.apache.hadoop.fs.FsShell.run(FsShell.java:2285) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79) - at org.apache.hadoop.fs.FsShell.main(FsShell.java:2353) - """ - - print("split lines:", s.splitlines()) - self.assertTrue(fs._test_match(s.splitlines()) != None) - - def test_config(self): - config = {"fs.default.name": "hdfs://xxx", "hadoop.job.ugi": "ugi"} - fs = HDFSClient( - "/usr/local/hadoop-2.7.7/", - config, - time_out=15 * 1000, - sleep_inter=100) - def _test_list_dir(self, fs): fs = HDFSClient( "/usr/local/hadoop-2.7.7/", diff --git a/python/paddle/fluid/tests/unittests/launch_function_helper.py b/python/paddle/fluid/tests/unittests/launch_function_helper.py index 64fee35710ae1b8690ec41b247ceb55e180b13c9..ecfe39b80e9051d332bb8fd2a05de2fa53770e46 100644 --- a/python/paddle/fluid/tests/unittests/launch_function_helper.py +++ b/python/paddle/fluid/tests/unittests/launch_function_helper.py @@ -13,6 +13,8 @@ # limitations under the License. from multiprocessing import Pool, Process import os +import socket +from contextlib import closing def launch_func(func, env_dict): @@ -20,3 +22,16 @@ def launch_func(func, env_dict): os.environ[key] = env_dict[key] proc = Process(target=func) return proc + + +def _find_free_port(port_set): + def __free_port(): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + return s.getsockname()[1] + + while True: + port = __free_port() + if port not in port_set: + port_set.add(port) + return port diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index 55c6bad9af689196f1eda7acf916518ab2c130da..d904bdbfa96ae1df83a0cacde0822611ac55757e 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -112,13 +112,10 @@ class TestMKLDNNSwishDim2(TestSwish): def setUp(self): super(TestMKLDNNSwishDim2, self).setUp() - x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) - beta = 2.3 - out = x * expit(beta * x) + self.attrs["use_mkldnn"] = True - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.outputs = {'Out': out} - self.attrs = {"use_mkldnn": True, "beta": beta} + def init_dtype(self): + self.dtype = np.float32 def init_dtype(self): self.dtype = np.float32 diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..1320623f8f8422f14677a3ca629735838dc94aa8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sync_batch_norm.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import contextlib +import unittest +import numpy as np +import six +import pickle + +import paddle +import paddle.fluid as fluid +import paddle.fluid.dygraph as dygraph +from paddle.fluid import core +from paddle.fluid.optimizer import SGDOptimizer +from paddle.nn import Conv2d, Pool2D, Linear, SyncBatchNorm +from paddle.fluid.dygraph.base import to_variable + +from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase + + +class TestLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(TestLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) + + self._sync_batch_norm = SyncBatchNorm(num_filters) + + self._conv2 = Conv2d( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False) + + self._sync_batch_norm2 = SyncBatchNorm( + num_filters, + weight_attr=False, + bias_attr=False, + track_running_stats=False) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._sync_batch_norm(y) + y = self._conv2(y) + y = self._sync_batch_norm2(y) + + return y + + +class TestSyncBatchNorm(TestParallelDyGraphRunnerBase): + def get_model(self): + model = TestLayer(3, 64, 7) + train_reader = paddle.batch( + paddle.dataset.flowers.test(use_xmap=False), + batch_size=32, + drop_last=True) + opt = fluid.optimizer.Adam( + learning_rate=1e-3, parameter_list=model.parameters()) + return model, train_reader, opt + + def run_one_loop(self, model, opt, data): + batch_size = len(data) + dy_x_data = np.array([x[0].reshape(3, 224, 224) + for x in data]).astype('float32') + img = to_variable(dy_x_data) + img.stop_gradient = False + + out = model(img) + + out = fluid.layers.mean(out) + + return out + + +if __name__ == "__main__": + runtime_main(TestSyncBatchNorm) diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index ef4779f0e6f2df2f0b79f776d1e7b6c5cbf31a22..ec6b81f138321f2119a5a5aaf4b5ba9ae8f7e69b 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -34,7 +34,7 @@ class TestParallelExecutorBase(unittest.TestCase): def check_network_convergence(cls, method, use_cuda=True, - iter=50, + iter=5, batch_size=None, feed_dict=None, feed_data_reader=None, diff --git a/python/paddle/fluid/tests/unittests/parallel_test.sh b/python/paddle/fluid/tests/unittests/parallel_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..9da4f035345d7f04b69a1c9483cba7022ad10baa --- /dev/null +++ b/python/paddle/fluid/tests/unittests/parallel_test.sh @@ -0,0 +1,60 @@ +#!/bin/bash +unset https_proxy http_proxy +export FLAGS_rpc_disable_reuse_port=1 + +name=${TEST_TARGET_NAME} +UnitTests=${UnitTests} +TEST_TIMEOUT=${TEST_TIMEOUT} + +if [[ ${name}"x" == "x" ]]; then + echo "can't find name, please set TEST_TARGET_NAME first" + exit 1 +fi + +if [[ ${UnitTests}"x" == "x" ]]; then + echo "can't find UnitTests, please set TEST_TARGET_NAME first" + exit 1 +fi + +if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then + echo "can't find ${TEST_TIMEOUT}, please set ${TEST_TIMEOUT} first" + exit 1 +fi + +if [[ ${WITH_COVERAGE} == "ON" ]]; then + PYTHON_EXEC="python -u -m coverage run --branch -p " +else + PYTHON_EXEC="python -u " +fi + +run_time=$(( $TEST_TIMEOUT - 10 )) +echo "run_time: ${run_time}" +for ut in ${UnitTests}; do + echo "start ${ut}" + timeout -s SIGKILL ${run_time} ${PYTHON_EXEC} ./${ut}.py > ${ut}_run.log 2>&1 & +done + +FAIL=0 +for job in `jobs -p` +do + echo "jobs -p result:" `jobs -p` + echo $job + wait $job || let FAIL=FAIL+1 +done + +echo "fail_num:" $FAIL + +if [ "$FAIL" == "0" ]; +then + exit 0 +else + echo "FAIL! ($FAIL)" + + for ut in ${UnitTests}; do + log=${ut}_run.log + echo "cat ${log}" + cat $log + done + + exit 1 +fi diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py index 17e0cd0d5b18652f828af9936b07cb4122f87b97..45d39afc115d292fd79a3bbc4f609ad080f74602 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_net.py +++ b/python/paddle/fluid/tests/unittests/seresnext_net.py @@ -36,7 +36,7 @@ remove_dropout = False # and Executor is different. remove_bn = False -remove_cudnn_conv = False +remove_cudnn_conv = True remove_dropout = True remove_bn = True @@ -179,7 +179,7 @@ def batch_size(use_cuda): def iter(use_cuda): if use_cuda: return 10 - return 2 + return 1 gpu_img, gpu_label = init_data( diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 124767a3364b078ea2c74795c03497f3dc24ba8c..ab61a5b3cfccb0e885debe9786ae91a9754e9345 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -22,7 +22,7 @@ from scipy.special import expit, erf import paddle import paddle.fluid as fluid import paddle.nn as nn -import paddle.nn.functional as functional +import paddle.nn.functional as F from paddle.fluid import compiler, Program, program_guard @@ -118,7 +118,7 @@ class TestLogSigmoid(TestActivation): x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) out = np.log(1 / (1 + np.exp(-x))) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} def test_check_grad(self): @@ -127,6 +127,48 @@ class TestLogSigmoid(TestActivation): self.check_grad(['X'], 'Out', max_relative_error=0.008) +class TestLogSigmoidAPI(unittest.TestCase): + # test paddle.nn.LogSigmoid, paddle.nn.functional.logsigmoid + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [11, 17]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [11, 17]) + out1 = F.logsigmoid(x) + m = paddle.nn.LogSigmoid() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = np.log(1 / (1 + np.exp(-self.x_np))) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.logsigmoid(x) + m = paddle.nn.LogSigmoid() + out2 = m(x) + out_ref = np.log(1 / (1 + np.exp(-self.x_np))) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.logsigmoid, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[11, 17], dtype='int32') + self.assertRaises(TypeError, F.logsigmoid, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[11, 17], dtype='float16') + F.logsigmoid(x_fp16) + + class TestTanh(TestActivation, TestParameter): def setUp(self): self.op_type = "tanh" @@ -149,6 +191,59 @@ class TestTanh(TestActivation, TestParameter): self.dtype = np.float32 +class TestTanhAPI(unittest.TestCase): + # test paddle.tanh, paddle.nn.tanh, paddle.nn.functional.tanh + def setUp(self): + self.dtype = 'float32' + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype) + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12], self.dtype) + out1 = F.tanh(x) + th = paddle.nn.Tanh() + out2 = th(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = np.tanh(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_variable(self.x_np) + out1 = F.tanh(x) + out2 = paddle.tanh(x) + th = paddle.nn.Tanh() + out3 = th(x) + out_ref = np.tanh(self.x_np) + for r in [out1, out2, out3]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', [10, 12], self.dtype) + out = fluid.layers.tanh(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = np.tanh(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.tanh, 1) + # The input dtype must be float16, float32. + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.tanh, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.tanh(x_fp16) + + class TestAtan(TestActivation, TestParameter): def setUp(self): self.op_type = "atan" @@ -327,15 +422,20 @@ class TestCoshOpError(unittest.TestCase): fluid.layers.cosh(x_fp16) -class TestTanhShrink(TestActivation): +def ref_tanhshrink(x): + out = x - np.tanh(x) + return out + + +class TestTanhshrink(TestActivation): def setUp(self): self.op_type = "tanh_shrink" self.init_dtype() - x = np.random.uniform(0.1, 1, [10, 17]).astype(self.dtype) - out = x - np.tanh(x) + x = np.random.uniform(10, 20, [10, 17]).astype(self.dtype) + out = ref_tanhshrink(x) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} def test_check_grad(self): @@ -344,52 +444,224 @@ class TestTanhShrink(TestActivation): self.check_grad(['X'], 'Out') +class TestTanhshrinkAPI(unittest.TestCase): + # test paddle.nn.Tanhshrink, paddle.nn.functional.tanhshrink + def setUp(self): + self.x_np = np.random.uniform(10, 20, [10, 17]).astype(np.float64) + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.tanhshrink(x) + tanhshrink = paddle.nn.Tanhshrink() + out2 = tanhshrink(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_tanhshrink(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.tanhshrink(x) + tanhshrink = paddle.nn.Tanhshrink() + out2 = tanhshrink(x) + out_ref = ref_tanhshrink(self.x_np) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.tanh_shrink(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_tanhshrink(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.tanhshrink, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.tanhshrink, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.tanhshrink(x_fp16) + + +def ref_hardshrink(x, threshold): + out = np.copy(x) + out[(out >= -threshold) & (out <= threshold)] = 0 + return out + + class TestHardShrink(TestActivation): def setUp(self): self.op_type = "hard_shrink" self.init_dtype() - threshold = 0.5 + self.threshold = 0.5 + self.set_attrs() x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype) * 10 - out = np.copy(x) - out[(out >= -threshold) & (out <= threshold)] = 0 + out = ref_hardshrink(x, self.threshold) - self.attrs = {'lambda': threshold} - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.attrs = {'threshold': self.threshold} + self.inputs = {'X': x} self.outputs = {'Out': out} + def set_attrs(self): + pass + def test_check_grad(self): if self.dtype == np.float16: return self.check_grad(['X'], 'Out') -class TestHardShrinkOpError(unittest.TestCase): +class TestHardShrink_threshold_negative(TestHardShrink): + def set_attrs(self): + self.threshold = -0.1 + + +class TestHardShrinkAPI(unittest.TestCase): + # test paddle.nn.Hardshrink, paddle.nn.functional.hardshrink + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12]) + out1 = F.hardshrink(x) + hd = paddle.nn.Hardshrink() + out2 = hd(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_hardshrink(self.x_np, 0.5) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_variable(self.x_np) + out1 = F.hardshrink(x) + hd = paddle.nn.Hardshrink() + out2 = hd(x) + out_ref = ref_hardshrink(self.x_np, 0.5) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.hardshrink(x, 0.6) + hd = paddle.nn.Hardshrink(0.6) + out2 = hd(x) + out_ref = ref_hardshrink(self.x_np, 0.6) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', [10, 12]) + out = fluid.layers.hard_shrink(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_hardshrink(self.x_np, 0.5) + self.assertEqual(np.allclose(out_ref, res[0]), True) + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.hard_shrink, 1) + self.assertRaises(TypeError, F.hardshrink, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.hard_shrink, x_int32) + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.hardshrink, x_int32) # support the input dtype is float16 - x_fp16 = fluid.data(name='x_fp16', shape=[12, 10], dtype='float16') - fluid.layers.hard_shrink(x_fp16) + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.hardshrink(x_fp16) + + +def ref_hardtanh(x, min=-1.0, max=1.0): + out = np.copy(x) + out[np.abs(x - min) < 0.005] = min + 0.02 + out[np.abs(x - max) < 0.005] = max + 0.02 + out = np.minimum(np.maximum(x, min), max) + return out + + +class TestHardtanhAPI(unittest.TestCase): + # test paddle.nn.Hardtanh, paddle.nn.functional.hardtanh + def setUp(self): + self.x_np = np.random.uniform(-3, 3, [10, 12]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12]) + out1 = F.hardtanh(x) + m = paddle.nn.Hardtanh() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_hardtanh(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_variable(self.x_np) + out1 = F.hardtanh(x) + m = paddle.nn.Hardtanh() + out2 = m(x) + out_ref = ref_hardtanh(self.x_np) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.hardtanh(x, -2.0, 2.0) + m = paddle.nn.Hardtanh(-2.0, 2.0) + out2 = m(x) + out_ref = ref_hardtanh(self.x_np, -2.0, 2.0) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.hardtanh, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.hardtanh, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.hardtanh(x_fp16) -class TestSoftShrink(TestActivation): +def ref_softshrink(x, threshold=0.5): + out = np.copy(x) + out = (out < -threshold) * (out + threshold) + (out > threshold) * ( + out - threshold) + return out + + +class TestSoftshrink(TestActivation): def setUp(self): self.op_type = "softshrink" self.init_dtype() - lambda_val = 0.1 - x = np.random.uniform(0.25, 10, [10, 12]).astype(self.dtype) - out = np.copy(x) - out = (out < -lambda_val) * (out + lambda_val) + (out > lambda_val) * ( - out - lambda_val) + threshold = 0.8 - self.attrs = {'lambda': lambda_val} - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + x = np.random.uniform(0.25, 10, [10, 12]).astype(self.dtype) + out = ref_softshrink(x, threshold) + self.inputs = {'X': x} + self.attrs = {"lambda": threshold} self.outputs = {'Out': out} def test_check_grad(self): @@ -398,17 +670,59 @@ class TestSoftShrink(TestActivation): self.check_grad(['X'], 'Out') -class TestSoftShrinkOpError(unittest.TestCase): +class TestSoftshrinkAPI(unittest.TestCase): + # test paddle.nn.Softshrink, paddle.nn.functional.softshrink + def setUp(self): + self.threshold = 0.8 + self.x_np = np.random.uniform(0.25, 10, [10, 12]).astype(np.float64) + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.softshrink(x, self.threshold) + softshrink = paddle.nn.Softshrink(self.threshold) + out2 = softshrink(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_softshrink(self.x_np, self.threshold) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.softshrink(x, self.threshold) + softshrink = paddle.nn.Softshrink(self.threshold) + out2 = softshrink(x) + out_ref = ref_softshrink(self.x_np, self.threshold) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.softshrink(x, self.threshold) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_softshrink(self.x_np, self.threshold) + self.assertEqual(np.allclose(out_ref, res[0]), True) + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.softshrink, 1) + self.assertRaises(TypeError, F.softshrink, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.softshrink, x_int32) + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.softshrink, x_int32) + # The threshold must be no less than zero + x_fp32 = paddle.data(name='x_fp32', shape=[12, 10], dtype='float32') + self.assertRaises(ValueError, F.softshrink, x_fp32, -1.0) # support the input dtype is float16 - x_fp16 = fluid.data(name='x_fp16', shape=[12, 10], dtype='float16') - fluid.layers.softshrink(x_fp16) + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.softshrink(x_fp16) class TestSqrt(TestActivation, TestParameter): @@ -594,7 +908,7 @@ class TestRelu(TestActivation): x[np.abs(x) < 0.005] = 0.02 out = np.maximum(x, 0) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} def test_check_grad(self): @@ -603,32 +917,72 @@ class TestRelu(TestActivation): self.check_grad(['X'], 'Out') -class TestReluOpError(unittest.TestCase): +class TestReluAPI(unittest.TestCase): + # test paddle.nn.ReLU, paddle.nn.functional.relu + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12]) + out1 = F.relu(x) + m = paddle.nn.ReLU() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = np.maximum(self.x_np, 0) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.relu(x) + m = paddle.nn.ReLU() + out2 = m(x) + out_ref = np.maximum(self.x_np, 0) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.relu, 1) + self.assertRaises(TypeError, F.relu, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.relu, x_int32) + x_int32 = paddle.data(name='x_int32', shape=[10, 12], dtype='int32') + self.assertRaises(TypeError, F.relu, x_int32) # support the input dtype is float16 - x_fp16 = fluid.layers.data( - name='x_fp16', shape=[12, 10], dtype='float16') - fluid.layers.relu(x_fp16) + x_fp16 = paddle.data(name='x_fp16', shape=[10, 12], dtype='float16') + F.relu(x_fp16) + + +def ref_leaky_relu(x, alpha=0.01): + out = np.copy(x) + out[out < 0] *= alpha + return out class TestLeakyRelu(TestActivation): + def get_alpha(self): + return 0.02 + def setUp(self): self.op_type = "leaky_relu" self.init_dtype() + alpha = self.get_alpha() + np.random.seed(10) x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) # The same reason with TestAbs - x[np.abs(x) < 0.005] = 0.02 - out = np.maximum(x, 0.02 * x) + x[np.abs(x) < 0.005] = 0.05 + out = ref_leaky_relu(x, alpha) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} + self.attrs = {'alpha': alpha} def test_check_grad(self): if self.dtype == np.float16: @@ -636,18 +990,78 @@ class TestLeakyRelu(TestActivation): self.check_grad(['X'], 'Out') -class TestLeakyReluOpError(unittest.TestCase): +class TestLeakyReluAlpha1(TestLeakyRelu): + def get_alpha(self): + return 2 + + +class TestLeakyReluAlpha2(TestLeakyRelu): + def get_alpha(self): + return -0.01 + + +class TestLeakyReluAlpha3(TestLeakyRelu): + def get_alpha(self): + return -2.0 + + +class TestLeakyReluAPI(unittest.TestCase): + # test paddle.nn.LeakyReLU, paddle.nn.functional.leaky_relu, + # fluid.layers.leaky_relu + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12]) + out1 = F.leaky_relu(x) + m = paddle.nn.LeakyReLU() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_leaky_relu(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_variable(self.x_np) + out1 = F.leaky_relu(x) + m = paddle.nn.LeakyReLU() + out2 = m(x) + out_ref = ref_leaky_relu(self.x_np) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.leaky_relu(x, 0.6) + m = paddle.nn.LeakyReLU(0.6) + out2 = m(x) + out_ref = ref_leaky_relu(self.x_np, 0.6) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', [10, 12]) + out = fluid.layers.leaky_relu(x, 0.01) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_leaky_relu(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.leaky_relu, 1) + self.assertRaises(TypeError, F.leaky_relu, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.leaky_relu, x_int32) - # support the input dtype is float32 - x_fp16 = fluid.layers.data( - name='x_fp16', shape=[12, 10], dtype='float32') - fluid.layers.leaky_relu(x_fp16) + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.leaky_relu, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.leaky_relu(x_fp16) def gelu(x, approximate): @@ -667,7 +1081,7 @@ class TestGeluApproximate(TestActivation): x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) out = gelu(x, approximate) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} self.attrs = {"approximate": approximate} @@ -685,7 +1099,7 @@ class TestGelu(TestActivation): x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) out = gelu(x, approximate) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.inputs = {'X': x} self.outputs = {'Out': out} self.attrs = {"approximate": approximate} @@ -695,6 +1109,55 @@ class TestGelu(TestActivation): self.check_grad(['X'], 'Out') +class TestGELUAPI(unittest.TestCase): + # test paddle.nn.GELU, paddle.nn.functional.gelu + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [11, 17]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [11, 17]) + out1 = F.gelu(x) + m = paddle.nn.GELU() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = gelu(self.x_np, False) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.gelu(x) + m = paddle.nn.GELU() + out2 = m(x) + out_ref = gelu(self.x_np, False) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.gelu(x, True) + m = paddle.nn.GELU(True) + out2 = m(x) + out_ref = gelu(self.x_np, True) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.gelu, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[11, 17], dtype='int32') + self.assertRaises(TypeError, F.gelu, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[11, 17], dtype='float16') + F.gelu(x_fp16) + + class TestBRelu(TestActivation): def setUp(self): self.op_type = "brelu" @@ -734,20 +1197,24 @@ class TestBReluOpError(unittest.TestCase): fluid.layers.brelu(x_fp16) +def ref_relu6(x, threshold=6.0): + out = np.copy(x) + out[np.abs(x - threshold) < 0.005] = threshold + 0.02 + out = np.minimum(np.maximum(x, 0), threshold) + return out + + class TestRelu6(TestActivation): def setUp(self): self.op_type = "relu6" self.init_dtype() x = np.random.uniform(-1, 10, [10, 12]).astype(self.dtype) - threshold = 6.0 - # The same with TestAbs x[np.abs(x) < 0.005] = 0.02 - x[np.abs(x - threshold) < 0.005] = threshold + 0.02 - out = np.minimum(np.maximum(x, 0), threshold) + out = ref_relu6(x) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} - self.attrs = {'threshold': threshold} + self.inputs = {'X': x} + self.attrs = {'threshold': 6.0} self.outputs = {'Out': out} def test_check_grad(self): @@ -756,17 +1223,56 @@ class TestRelu6(TestActivation): self.check_grad(['X'], 'Out') -class TestRelu6OpError(unittest.TestCase): +class TestRelu6API(unittest.TestCase): + # test paddle.nn.ReLU6, paddle.nn.functional.relu6 + def setUp(self): + self.x_np = np.random.uniform(-1, 10, [10, 12]).astype(np.float64) + self.x_np[np.abs(self.x_np) < 0.005] = 0.02 + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.relu6(x) + relu6 = paddle.nn.ReLU6() + out2 = relu6(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_relu6(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.relu6(x) + relu6 = paddle.nn.ReLU6() + out2 = relu6(x) + out_ref = ref_relu6(self.x_np) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.relu6(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_relu6(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.relu6, 1) + self.assertRaises(TypeError, F.relu6, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.relu6, x_int32) + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.relu6, x_int32) # support the input dtype is float16 - x_fp16 = fluid.data(name='x_fp16', shape=[12, 10], dtype='float16') - fluid.layers.relu6(x_fp16) + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.relu6(x_fp16) class TestHardSwish(TestActivation): @@ -844,6 +1350,11 @@ class TestSoftReluOpError(unittest.TestCase): fluid.layers.soft_relu(x_fp16) +def elu(x, alpha): + out_ref = np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) + return out_ref.astype(x.dtype) + + class TestELU(TestActivation): def setUp(self): self.op_type = "elu" @@ -851,7 +1362,7 @@ class TestELU(TestActivation): x = np.random.uniform(-3, 3, [10, 12]).astype(self.dtype) alpha = 1. - out = np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) + out = elu(x, alpha) # Note: unlike other Relu extensions, point 0 on standard ELU function (i.e. alpha = 1) # is differentiable, so we can skip modifications like x[np.abs(x) < 0.005] = 0.02 here self.inputs = {'X': x} @@ -864,16 +1375,53 @@ class TestELU(TestActivation): self.check_grad(['X'], 'Out') -class TestELUOpError(unittest.TestCase): +class TestELUAPI(unittest.TestCase): + # test paddle.nn.ELU, paddle.nn.functional.elu + def setUp(self): + self.x_np = np.random.uniform(-3, 3, [10, 12]).astype('float32') + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [10, 12]) + out1 = F.elu(x) + m = paddle.nn.ELU() + out2 = m(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = elu(self.x_np, 1.0) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.elu(x) + m = paddle.nn.ELU() + out2 = m(x) + out_ref = elu(self.x_np, 1.0) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.elu(x, 0.2) + m = paddle.nn.ELU(0.2) + out2 = m(x) + out_ref = elu(self.x_np, 0.2) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + def test_errors(self): - with program_guard(Program(), Program()): - # The input type of elu_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - self.assertRaises(TypeError, fluid.layers.elu, x1) - # The input dtype of elu_op must be float16 float32 or float64. - x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32") - self.assertRaises(TypeError, fluid.layers.elu, x2) + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.elu, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[10, 12], dtype='int32') + self.assertRaises(TypeError, F.elu, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[10, 12], dtype='float16') + F.elu(x_fp16) class TestReciprocal(TestActivation): @@ -1107,16 +1655,25 @@ class TestSTanhOpError(unittest.TestCase): fluid.layers.stanh(x_fp16) +def ref_softplus(x, beta=1, threshold=20): + x_beta = beta * x + out = np.select([x_beta <= threshold, x_beta > threshold], + [np.log(1 + np.exp(x_beta)) / beta, x]) + return out + + class TestSoftplus(TestActivation): def setUp(self): self.op_type = "softplus" self.init_dtype() - self.dtype = np.float64 - x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) - out = np.log(1 + np.exp(x)) + beta = 2 + threshold = 15 - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype) + out = ref_softplus(x, beta, threshold) + self.inputs = {'X': x} + self.attrs = {'beta': beta, "threshold": threshold} self.outputs = {'Out': out} def test_check_grad(self): @@ -1125,15 +1682,72 @@ class TestSoftplus(TestActivation): self.check_grad(['X'], 'Out') +class TestSoftplusAPI(unittest.TestCase): + # test paddle.nn.Softplus, paddle.nn.functional.softplus + def setUp(self): + self.beta = 2 + self.threshold = 15 + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype(np.float64) + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.softplus(x, self.beta, self.threshold) + softplus = paddle.nn.Softplus(self.beta, self.threshold) + out2 = softplus(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_softplus(self.x_np, self.beta, self.threshold) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.softplus(x, self.beta, self.threshold) + softplus = paddle.nn.Softplus(self.beta, self.threshold) + out2 = softplus(x) + out_ref = ref_softplus(self.x_np, self.beta, self.threshold) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.softplus(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_softplus(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.softplus, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.softplus, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.softplus(x_fp16) + + +def ref_softsign(x): + out = np.divide(x, 1 + np.abs(x)) + return out + + class TestSoftsign(TestActivation): def setUp(self): self.op_type = "softsign" self.init_dtype() - x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) - out = np.divide(x, 1 + np.abs(x)) - - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype) + out = ref_softsign(x) + self.inputs = {'X': x} self.outputs = {'Out': out} def test_check_grad(self): @@ -1142,6 +1756,57 @@ class TestSoftsign(TestActivation): self.check_grad(['X'], 'Out') +class TestSoftsignAPI(unittest.TestCase): + # test paddle.nn.Softsign, paddle.nn.functional.softsign + def setUp(self): + self.x_np = np.random.uniform(-1, 1, [10, 12]).astype(np.float64) + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.softsign(x) + softsign = paddle.nn.Softsign() + out2 = softsign(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_softsign(self.x_np) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.softsign(x) + softsign = paddle.nn.Softsign() + out2 = softsign(x) + out_ref = ref_softsign(self.x_np) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.softsign(x) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_softsign(self.x_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.softsign, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.softsign, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.softsign(x_fp16) + + class TestThresholdedRelu(TestActivation): def setUp(self): self.op_type = "thresholded_relu" @@ -1337,9 +2002,9 @@ create_test_act_fp16_class(TestActivation) create_test_act_fp16_class(TestSigmoid) create_test_act_fp16_class(TestLogSigmoid) create_test_act_fp16_class(TestTanh) -create_test_act_fp16_class(TestTanhShrink) +create_test_act_fp16_class(TestTanhshrink) create_test_act_fp16_class(TestHardShrink) -create_test_act_fp16_class(TestSoftShrink) +create_test_act_fp16_class(TestSoftshrink) create_test_act_fp16_class(TestSqrt) create_test_act_fp16_class(TestAbs) create_test_act_fp16_class(TestCeil, grad_check=False) @@ -1372,140 +2037,5 @@ create_test_act_fp16_class(TestHardSigmoid) create_test_act_fp16_class(TestSwish) create_test_act_fp16_class(TestHardSwish) - -class TestNNReluAPI(unittest.TestCase): - def setUp(self): - self.init_data() - - def init_data(self): - self.x_shape = [10, 12] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - self.y = self.ref_forward(self.x) - - def ref_forward(self, x): - return np.maximum(x, 0) - - def ref_backward(self, y, dy): - y_t = y.copy() - y_t[y_t > 0] = 1 - return y_t * dy - - def check_api(self, place=fluid.CPUPlace(), inplace=False): - main_program = Program() - myrelu = nn.ReLU(inplace) - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - x.stop_gradient = False - y = myrelu(x) - fluid.backward.append_backward(fluid.layers.mean(y)) - exe = fluid.Executor(place) - out = exe.run(main_program, - feed={'x': self.x}, - fetch_list=[y, y.grad_name, x.grad_name]) - self.assertTrue(np.allclose(out[0], self.y)) - self.assertTrue(np.allclose(out[2], self.ref_backward(self.y, out[1]))) - - with fluid.dygraph.guard(place): - x = fluid.dygraph.to_variable(self.x) - y = myrelu(x) - self.assertTrue(np.allclose(y.numpy(), self.y)) - - def test_check_api(self): - places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - for inplace in [True, False]: - self.check_api(place, inplace) - - -class TestNNFunctionalReluAPI(unittest.TestCase): - def setUp(self): - self.init_data() - - def init_data(self): - self.x_shape = [10, 12] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - self.y = self.ref_forward(self.x) - - def ref_forward(self, x): - return np.maximum(x, 0) - - def test_check_api(self): - main_program = Program() - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - y = functional.relu(x) - exe = fluid.Executor(fluid.CPUPlace()) - out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) - self.assertTrue(np.allclose(out[0], self.y)) - - -class TestNNSigmoidAPI(unittest.TestCase): - def setUp(self): - self.init_data() - - def init_data(self): - self.x_shape = [10, 15] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - self.y = self.ref_forward(self.x) - - def ref_forward(self, x): - return 1 / (1 + np.exp(-x)) - - def ref_backward(self, y, dy): - return dy * y * (1 - y) - - def check_api(self, place=fluid.CPUPlace(), inplace=False): - main_program = Program() - mysigmoid = nn.Sigmoid(inplace) - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - x.stop_gradient = False - y = mysigmoid(x) - fluid.backward.append_backward(fluid.layers.mean(y)) - exe = fluid.Executor(place) - out = exe.run(main_program, - feed={'x': self.x}, - fetch_list=[y, y.grad_name, x.grad_name]) - self.assertTrue(np.allclose(out[0], self.y)) - self.assertTrue(np.allclose(out[2], self.ref_backward(self.y, out[1]))) - - with fluid.dygraph.guard(place): - x = fluid.dygraph.to_variable(self.x) - y = mysigmoid(x) - self.assertTrue(np.allclose(y.numpy(), self.y)) - - def test_check_api(self): - places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - for inplace in [True, False]: - self.check_api(place, inplace) - - -class TestNNFunctionalSigmoidAPI(unittest.TestCase): - def setUp(self): - self.init_data() - - def init_data(self): - self.x_shape = [10, 15] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - self.y = self.ref_forward(self.x) - - def ref_forward(self, x): - return 1 / (1 + np.exp(-x)) - - def test_check_api(self): - main_program = Program() - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - y = functional.sigmoid(x) - exe = fluid.Executor(fluid.CPUPlace()) - out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) - self.assertTrue(np.allclose(out[0], self.y)) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index 7a7099b7113c8233fb94074519386f9e4270a019..990499858ca52f5b471211aa659e64d3e13fccc3 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -20,6 +20,7 @@ from op_test import OpTest from paddle.fluid import core from paddle.fluid.op import Operator import paddle.fluid as fluid +import paddle class TestAdamOp1(OpTest): @@ -401,46 +402,111 @@ class TestAdamOpBetaVariable(OpTest): self.check_output() -class TestAdamOptimizerBetaVariable(unittest.TestCase): - def test_adam_optimizer(self): - def test_with_place(place, shape): - exe = fluid.Executor(place) - - train_prog = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(train_prog, startup): - with fluid.unique_name.guard(): - data = fluid.data(name="data", shape=shape) - conv = fluid.layers.conv2d(data, 8, 3) - loss = fluid.layers.reduce_mean(conv) - - beta1 = fluid.layers.create_global_var( - shape=[1], - value=0.85, - dtype='float32', - persistable=True) - beta2 = fluid.layers.create_global_var( - shape=[1], - value=0.95, - dtype='float32', - persistable=True) - opt = fluid.optimizer.Adam( - learning_rate=1e-5, beta1=beta1, beta2=beta2) - opt.minimize(loss) - - exe.run(startup) - data_np = np.random.random(shape).astype('float32') - rets = exe.run(train_prog, - feed={"data": data_np}, - fetch_list=[loss]) - assert rets[0] is not None - +class TestAdamOpV2(unittest.TestCase): + def test_adam_op(self): + place = fluid.CPUPlace() shape = [2, 3, 8, 8] - places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - test_with_place(place, shape) + exe = fluid.Executor(place) + train_prog = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(train_prog, startup): + with fluid.unique_name.guard(): + data = fluid.data(name="data", shape=shape) + conv = fluid.layers.conv2d(data, 8, 3) + loss = fluid.layers.reduce_mean(conv) + + beta1 = fluid.layers.create_global_var( + shape=[1], value=0.85, dtype='float32', persistable=True) + beta2 = fluid.layers.create_global_var( + shape=[1], value=0.95, dtype='float32', persistable=True) + betas = [beta1, beta2] + opt = paddle.optimizer.Adam( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + + def test_adam_op_dygraph(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = fluid.dygraph.to_variable(value) + linear = fluid.Linear(13, 5, dtype="float32") + + adam = paddle.optimizer.Adam( + learning_rate=0.01, parameters=linear.parameters()) + out = linear(a) + out.backward() + adam.step() + adam.clear_gradients() + + def test_adam_op_with_state_dict(self): + + import paddle + paddle.disable_static() + emb = paddle.nn.Embedding([10, 10]) + + adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) + state_dict = adam.state_dict() + adam.set_state_dict(state_dict) + + #learning_rate is Decay + learning_rate = fluid.dygraph.CosineDecay(0.1, 10000, 120) + adam = paddle.optimizer.Adam( + learning_rate=learning_rate, + weight_decay=fluid.regularizer.L2Decay(0.001), + parameters=emb.parameters()) + lr = adam.get_lr() + state_dict = adam.state_dict() + adam.set_state_dict(state_dict) + + #leanrning_rate is Tensor + with self.assertRaises(TypeError): + learning_rate = np.array([0.01]).astype("float32") + learning_rate = paddle.to_tensor(learning_rate) + adam = paddle.optimizer.Adam( + learning_rate=learning_rate, parameters=emb.parameters()) + + params = adam.get_opti_var_name_list() + assert (params is not None) + + def test_adam_with_grad_clip(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = fluid.dygraph.to_variable(value) + linear = fluid.Linear(13, 5, dtype="float32") + clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) + adam = paddle.optimizer.Adam( + 0.1, parameters=linear.parameters(), grad_clip=clip) + out = linear(a) + out.backward() + adam.step() + adam.clear_gradients() + + def test_adam_op_with_set_lr(self): + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) + + lr = 0.01 + adam.set_lr(lr) + cur_lr = adam.get_lr() + assert (lr == cur_lr) + + lr_var = paddle.create_global_var(shape=[1], value=lr, dtype='float32') + adam.set_lr(lr_var) + cur_lr = adam.get_lr() + assert (np.float32(lr) == cur_lr) + + with self.assertRaises(TypeError): + lr = int(1) + adam.set_lr(lr) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_adamax_api.py b/python/paddle/fluid/tests/unittests/test_adamax_api.py new file mode 100644 index 0000000000000000000000000000000000000000..f6946dc80b5e55b2e7149f357fe0600916a4fe9f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adamax_api.py @@ -0,0 +1,67 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid + + +class TestAdamaxAPI(unittest.TestCase): + def test_adamax_api_dygraph(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_variable(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + adam = paddle.optimizer.Adamax( + learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.01) + out = linear(a) + out.backward() + adam.step() + adam.clear_gradients() + + def test_adamax_api(self): + place = fluid.CPUPlace() + shape = [2, 3, 8, 8] + exe = fluid.Executor(place) + train_prog = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(train_prog, startup): + with fluid.unique_name.guard(): + data = fluid.data(name="data", shape=shape) + conv = fluid.layers.conv2d(data, 8, 3) + loss = paddle.mean(conv) + beta1 = 0.85 + beta2 = 0.95 + opt = paddle.optimizer.Adamax( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ddb70d6e6400c8e7ae71cabf92ce8060e220a7da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -0,0 +1,81 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import numpy as np +import paddle.fluid as fluid + + +class TestAdamWOp(unittest.TestCase): + def test_adamw_op_dygraph(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_variable(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + adam = paddle.optimizer.AdamW( + learning_rate=0.01, + parameters=linear.parameters(), + apply_decay_param_fun=lambda name: True, + weight_decay=0.01) + out = linear(a) + out.backward() + adam.step() + adam.clear_gradients() + + def test_adamw_op_coverage(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_variable(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + adam = paddle.optimizer.AdamW( + learning_rate=0.0, + parameters=linear.parameters(), + apply_decay_param_fun=lambda name: True, + weight_decay=0.01) + assert (adam.__str__() is not None) + + def test_adamw_op(self): + place = fluid.CPUPlace() + shape = [2, 3, 8, 8] + exe = fluid.Executor(place) + train_prog = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(train_prog, startup): + with fluid.unique_name.guard(): + data = fluid.data(name="data", shape=shape) + conv = fluid.layers.conv2d(data, 8, 3) + loss = paddle.mean(conv) + + beta1 = fluid.layers.create_global_var( + shape=[1], value=0.85, dtype='float32', persistable=True) + beta2 = fluid.layers.create_global_var( + shape=[1], value=0.95, dtype='float32', persistable=True) + betas = [beta1, beta2] + opt = paddle.optimizer.AdamW( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py new file mode 100644 index 0000000000000000000000000000000000000000..55c30e3d2ade0725e6debcdd0a69ca4eee622aec --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py @@ -0,0 +1,274 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import unittest +import numpy as np + +import paddle.fluid.core as core +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def adaptive_pool2d_forward(x, output_size, data_format='NCHW', + pool_type="avg"): + + N = x.shape[0] + C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \ + else [x.shape[3], x.shape[1], x.shape[2]] + + if (isinstance(output_size, int) or output_size == None): + H_out = output_size + W_out = output_size + output_size = [H_out, W_out] + else: + H_out, W_out = output_size + + if output_size[0] == None: + output_size[0] = H + H_out = H + if output_size[1] == None: + output_size[1] = W + W_out = W + + out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \ + else np.zeros((N, H_out, W_out, C)) + + for i in range(H_out): + in_h_start = adaptive_start_index(i, H, output_size[0]) + in_h_end = adaptive_end_index(i, H, output_size[0]) + + for j in range(W_out): + in_w_start = adaptive_start_index(j, W, output_size[1]) + in_w_end = adaptive_end_index(j, W, output_size[1]) + + if data_format == 'NCHW': + x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] + if pool_type == 'avg': + field_size = ( + (in_h_end - in_h_start) * (in_w_end - in_w_start)) + out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size + elif pool_type == 'max': + out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + elif data_format == 'NHWC': + x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] + if pool_type == 'avg': + field_size = ( + (in_h_end - in_h_start) * (in_w_end - in_w_start)) + out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size + elif pool_type == 'max': + out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) + return out + + +class TestAdaptiveAvgPool2dAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[3, 3], pool_type="avg") + + self.res_2_np = adaptive_pool2d_forward( + x=self.x_np, output_size=5, pool_type="avg") + + self.res_3_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[2, 5], pool_type="avg") + + self.res_4_np = adaptive_pool2d_forward( + x=self.x_np, + output_size=[3, 3], + pool_type="avg", + data_format="NHWC") + + self.res_5_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[None, 3], pool_type="avg") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32") + + out_1 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[3, 3]) + + out_2 = paddle.nn.functional.adaptive_avg_pool2d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[2, 5]) + + out_4 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[3, 3], data_format="NHWC") + + out_5 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[None, 3]) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_4, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + out_1 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[3, 3]) + + out_2 = paddle.nn.functional.adaptive_avg_pool2d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[2, 5]) + + out_4 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[3, 3], data_format="NHWC") + + out_5 = paddle.nn.functional.adaptive_avg_pool2d( + x=x, output_size=[None, 3]) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[3, 3], pool_type="avg") + + self.res_2_np = adaptive_pool2d_forward( + x=self.x_np, output_size=5, pool_type="avg") + + self.res_3_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[2, 5], pool_type="avg") + + self.res_4_np = adaptive_pool2d_forward( + x=self.x_np, + output_size=[3, 3], + pool_type="avg", + data_format="NHWC") + + self.res_5_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[None, 3], pool_type="avg") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32") + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3]) + out_1 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=5) + out_2 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[2, 5]) + out_3 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d( + output_size=[3, 3], data_format="NHWC") + out_4 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d( + output_size=[None, 3]) + out_5 = adaptive_avg_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_4, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3]) + out_1 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=5) + out_2 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[2, 5]) + out_3 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d( + output_size=[3, 3], data_format="NHWC") + out_4 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d( + output_size=[None, 3]) + out_5 = adaptive_avg_pool(x=x) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py new file mode 100755 index 0000000000000000000000000000000000000000..c04ee660667edaff01d7029e83b912c05429a15f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py @@ -0,0 +1,293 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import unittest +import numpy as np + +import paddle.fluid.core as core +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def adaptive_pool3d_forward(x, + output_size, + adaptive=True, + data_format='NCDHW', + pool_type='avg'): + + N = x.shape[0] + C, D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \ + if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]] + + if (isinstance(output_size, int) or output_size == None): + H_out = output_size + W_out = output_size + D_out = output_size + output_size = [D_out, H_out, W_out] + else: + D_out, H_out, W_out = output_size + + if output_size[0] == None: + output_size[0] = D + D_out = D + if output_size[1] == None: + output_size[1] = H + H_out = H + if output_size[2] == None: + output_size[2] = W + W_out = W + + out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \ + else np.zeros((N, D_out, H_out, W_out, C)) + for k in range(D_out): + d_start = adaptive_start_index(k, D, output_size[0]) + d_end = adaptive_end_index(k, D, output_size[0]) + + for i in range(H_out): + h_start = adaptive_start_index(i, H, output_size[1]) + h_end = adaptive_end_index(i, H, output_size[1]) + + for j in range(W_out): + w_start = adaptive_start_index(j, W, output_size[2]) + w_end = adaptive_end_index(j, W, output_size[2]) + + if data_format == 'NCDHW': + x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: + w_end] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * ( + w_end - w_start) + out[:, :, k, i, j] = np.sum(x_masked, + axis=(2, 3, 4)) / field_size + elif pool_type == 'max': + out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) + + elif data_format == 'NDHWC': + x_masked = x[:, d_start:d_end, h_start:h_end, w_start: + w_end, :] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * ( + w_end - w_start) + out[:, k, i, j, :] = np.sum(x_masked, + axis=(1, 2, 3)) / field_size + elif pool_type == 'max': + out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3)) + return out + + +class TestAdaptiveAvgPool3dAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], pool_type="avg") + + self.res_2_np = adaptive_pool3d_forward( + x=self.x_np, output_size=5, pool_type="avg") + + self.res_3_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], pool_type="avg") + + self.res_4_np = adaptive_pool3d_forward( + x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg", + data_format="NDHWC") + + self.res_5_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], pool_type="avg") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + + out_1 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[3, 3, 3]) + + out_2 = paddle.nn.functional.adaptive_avg_pool3d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[2, 3, 5]) + + out_4 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[3, 3, 3], data_format="NDHWC") + + out_5 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[None, 3, None]) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_4, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + out_1 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[3, 3, 3]) + + out_2 = paddle.nn.functional.adaptive_avg_pool3d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[2, 3, 5]) + + out_4 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[3, 3, 3], data_format="NDHWC") + + out_5 = paddle.nn.functional.adaptive_avg_pool3d( + x=x, output_size=[None, 3, None]) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], pool_type="avg") + + self.res_2_np = adaptive_pool3d_forward( + x=self.x_np, output_size=5, pool_type="avg") + + self.res_3_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], pool_type="avg") + + self.res_4_np = adaptive_pool3d_forward( + x=self.x_np, + output_size=[3, 3, 3], + pool_type="avg", + data_format="NDHWC") + + self.res_5_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], pool_type="avg") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[3, 3, 3]) + out_1 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=5) + out_2 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[2, 3, 5]) + out_3 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[3, 3, 3], data_format="NDHWC") + out_4 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[None, 3, None]) + out_5 = adaptive_avg_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_4, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_4, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[3, 3, 3]) + out_1 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=5) + out_2 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[2, 3, 5]) + out_3 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[3, 3, 3], data_format="NDHWC") + out_4 = adaptive_avg_pool(x=x) + + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( + output_size=[None, 3, None]) + out_5 = adaptive_avg_pool(x=x) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py index c524fb6930d97c0eb2971d09e751a54628d41325..6157314b1f060577a7d058f0de9a42f6368947ff 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py @@ -63,7 +63,7 @@ class TestAffineChannelOp(OpTest): self.check_grad(['X'], 'Out', no_grad_set=set(['Scale', 'Bias'])) def init_test_case(self): - self.shape = [2, 100, 12, 12] + self.shape = [2, 100, 3, 3] self.C = 100 self.layout = 'NCHW' @@ -102,7 +102,7 @@ class TestAffineChannelOpError(unittest.TestCase): class TestAffineChannelNHWC(TestAffineChannelOp): def init_test_case(self): - self.shape = [2, 12, 12, 100] + self.shape = [2, 3, 3, 100] self.C = 100 self.layout = 'NHWC' @@ -115,7 +115,7 @@ class TestAffineChannelNHWC(TestAffineChannelOp): class TestAffineChannel2D(TestAffineChannelOp): def init_test_case(self): - self.shape = [8, 100] + self.shape = [2, 100] self.C = 100 self.layout = 'NCHW' diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_function.py b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py new file mode 100644 index 0000000000000000000000000000000000000000..c874cf197ea88c7f12b9b24223d40d22be268b10 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py @@ -0,0 +1,149 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class AffineGridTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + theta_shape=(20, 2, 3), + output_shape=[20, 2, 5, 7], + align_corners=True, + dtype="float32", + invalid_theta=False, + variable_output_shape=False): + super(AffineGridTestCase, self).__init__(methodName) + + self.theta_shape = theta_shape + self.output_shape = output_shape + self.align_corners = align_corners + self.dtype = dtype + self.invalid_theta = invalid_theta + self.variable_output_shape = variable_output_shape + + def setUp(self): + self.theta = np.random.randn(*(self.theta_shape)).astype(self.dtype) + + def fluid_layer(self, place): + # align_corners = True + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + theta_var = fluid.data( + "input", self.theta_shape, dtype=self.dtype) + y_var = fluid.layers.affine_grid(theta_var, self.output_shape) + feed_dict = {"input": self.theta} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + theta_var = fluid.data( + "input", self.theta_shape, dtype=self.dtype) + y_var = F.affine_grid( + theta_var, + self.output_shape, + align_corners=self.align_corners) + feed_dict = {"input": self.theta} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_dygraph_layer(self): + theta_var = dg.to_variable( + self.theta) if not self.invalid_theta else "invalid" + output_shape = dg.to_variable( + self. + output_shape) if self.variable_output_shape else self.output_shape + y_var = F.affine_grid( + theta_var, output_shape, align_corners=self.align_corners) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + place = fluid.CPUPlace() + result1 = self.fluid_layer(place) + result2 = self.functional(place) + with dg.guard(place): + result3 = self.paddle_dygraph_layer() + if self.align_corners: + np.testing.assert_array_almost_equal(result1, result2) + np.testing.assert_array_almost_equal(result2, result3) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class AffineGridErrorTestCase(AffineGridTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_dygraph_layer() + + +def add_cases(suite): + suite.addTest(AffineGridTestCase(methodName='runTest')) + suite.addTest(AffineGridTestCase(methodName='runTest', align_corners=True)) + + suite.addTest(AffineGridTestCase(methodName='runTest', align_corners=False)) + suite.addTest( + AffineGridTestCase( + methodName='runTest', variable_output_shape=True)) + + suite.addTest( + AffineGridTestCase( + methodName='runTest', + theta_shape=(20, 2, 3), + output_shape=[20, 1, 7, 7], + align_corners=True)) + + +def add_error_cases(suite): + suite.addTest( + AffineGridErrorTestCase( + methodName='runTest', output_shape="not_valid")) + suite.addTest( + AffineGridErrorTestCase( + methodName='runTest', + invalid_theta=True)) # to test theta not variable error checking + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py index 3668c4f4aa174e34dcc96d40ddae7b359c1bee18..55612d71a17a7ae9801535bf5a35c83b100aab30 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py @@ -17,14 +17,20 @@ import numpy as np from op_test import OpTest -def AffineGrid(theta, size): +def AffineGrid(theta, size, align_corners): n = size[0] w = size[3] h = size[2] + h_factor = w_factor = 1 + if not align_corners: + h_factor = (h - 1) / float(h) + w_factor = (w - 1) / float(w) h_idx = np.repeat( - np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis] + np.linspace(-1, 1, h)[np.newaxis, :], w, + axis=0).T[:, :, np.newaxis] * h_factor w_idx = np.repeat( - np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis] + np.linspace(-1, 1, w)[np.newaxis, :], h, + axis=0)[:, :, np.newaxis] * w_factor grid = np.concatenate( [w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3 grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3 @@ -45,12 +51,17 @@ class TestAffineGridOp(OpTest): theta = np.random.randint(1, 3, self.theta_shape).astype("float32") theta = np.ones(self.theta_shape).astype("float32") self.inputs = {'Theta': theta} - self.attrs = {"use_cudnn": True} + self.attrs = { + "use_cudnn": self.use_cudnn, + "align_corners": self.align_corners + } if self.dynamic_shape: self.inputs['OutputShape'] = self.output_shape else: self.attrs['output_shape'] = self.output_shape - self.outputs = {'Output': AffineGrid(theta, self.output_shape)} + self.outputs = { + 'Output': AffineGrid(theta, self.output_shape, self.align_corners) + } def test_check_output(self): self.check_output() @@ -62,6 +73,8 @@ class TestAffineGridOp(OpTest): self.theta_shape = (17, 2, 3) self.output_shape = np.array([17, 2, 5, 7]).astype("int32") self.dynamic_shape = False + self.use_cudnn = False + self.align_corners = True class TestAffineGridOpCase1(TestAffineGridOp): @@ -69,6 +82,35 @@ class TestAffineGridOpCase1(TestAffineGridOp): self.theta_shape = (20, 2, 3) self.output_shape = np.array([20, 2, 5, 7]).astype("int32") self.dynamic_shape = True + self.use_cudnn = True + self.align_corners = True + + +class TestAffineGridOpCase2(TestAffineGridOp): + def initTestCase(self): + self.theta_shape = (20, 2, 3) + self.output_shape = np.array([20, 2, 5, 7]).astype("int32") + self.dynamic_shape = True + self.use_cudnn = False + self.align_corners = True + + +class TestAffineGridOpCase3(TestAffineGridOp): + def initTestCase(self): + self.theta_shape = (20, 2, 3) + self.output_shape = np.array([20, 2, 5, 7]).astype("int32") + self.dynamic_shape = True + self.use_cudnn = False + self.align_corners = False + + +class TestAffineGridOpCase4(TestAffineGridOp): + def initTestCase(self): + self.theta_shape = (25, 2, 3) + self.output_shape = np.array([25, 2, 5, 6]).astype("int32") + self.dynamic_shape = False + self.use_cudnn = False + self.align_corners = False if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_allclose_op.py b/python/paddle/fluid/tests/unittests/test_allclose_op.py index 5b5ed2641880ade671434185414fa45c26901a2d..dc50e569f80433a5730b1ea33a6f3b4922d99c91 100644 --- a/python/paddle/fluid/tests/unittests/test_allclose_op.py +++ b/python/paddle/fluid/tests/unittests/test_allclose_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np from op_test import OpTest +import paddle class TestAllcloseOp(OpTest): @@ -76,5 +77,58 @@ class TestAllcloseOpNanTrue(TestAllcloseOp): self.equal_nan = True +class TestAllcloseDygraph(unittest.TestCase): + def test_api_case(self): + paddle.disable_static() + x_data = np.random.rand(10, 10) + y_data = np.random.rand(10, 10) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + out = paddle.allclose(x, y, rtol=1e-05, atol=1e-08) + expected_out = np.allclose(x_data, y_data, rtol=1e-05, atol=1e-08) + self.assertTrue((out.numpy() == expected_out).all(), True) + paddle.enable_static() + + +class TestAllcloseError(unittest.TestCase): + def test_input_dtype(self): + def test_x_dtype(): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + x = paddle.data(name='x', shape=[10, 10], dtype='float16') + y = paddle.data(name='y', shape=[10, 10], dtype='float64') + result = paddle.allclose(x, y) + + self.assertRaises(TypeError, test_x_dtype) + + def test_y_dtype(): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + x = paddle.data(name='x', shape=[10, 10], dtype='float64') + y = paddle.data(name='y', shape=[10, 10], dtype='int32') + result = paddle.allclose(x, y) + + self.assertRaises(TypeError, test_y_dtype) + + def test_attr(self): + x = paddle.data(name='x', shape=[10, 10], dtype='float64') + y = paddle.data(name='y', shape=[10, 10], dtype='float64') + + def test_rtol(): + result = paddle.allclose(x, y, rtol=True) + + self.assertRaises(TypeError, test_rtol) + + def test_atol(): + result = paddle.allclose(x, y, rtol=True) + + self.assertRaises(TypeError, test_atol) + + def test_equal_nan(): + result = paddle.allclose(x, y, equal_nan=1) + + self.assertRaises(TypeError, test_equal_nan) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py index 0201f0635a5afeb285cdbca3e8d526a1ff5032f2..3639c4dea0a3a12aa46d2875affeebd4c623a4dd 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py @@ -201,107 +201,5 @@ class BaseTestComplex2_2(OpTest): } -class APT_ArgMaxTest(unittest.TestCase): - def test_output_result(self): - with fluid.program_guard(fluid.Program()): - data1 = fluid.data(name="X", shape=[3, 4], dtype="float32") - data2 = fluid.data(name="Y", shape=[3], dtype="int64") - out = paddle.argmax(input=data1, out=data2) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - result = exe.run( - feed={"X": np.random.rand(3, 4).astype("float32")}, - fetch_list=[data2, out]) - self.assertEqual((result[0] == result[1]).all(), True) - - def test_basic(self): - with fluid.program_guard(fluid.Program()): - data = fluid.data(name="X", shape=[3, 4], dtype="float32") - out = paddle.argmax(input=data) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - np_input = np.random.rand(3, 4).astype("float32") - expected_result = np.argmax(np_input, axis=1) - - result, = exe.run(feed={"X": np_input}, fetch_list=[out]) - self.assertEqual((result == expected_result).all(), True) - - with fluid.program_guard(fluid.Program()): - data = fluid.data(name="X", shape=[3, 4], dtype="float32") - out = paddle.argmax(input=data, axis=0) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - np_input = np.random.rand(3, 4).astype("float32") - expected_result = np.argmax(np_input, axis=0) - - result = exe.run(feed={"X": np_input}, fetch_list=[out]) - self.assertEqual((result == expected_result).all(), True) - - with fluid.program_guard(fluid.Program()): - data = fluid.data(name="X", shape=[3, 4], dtype="float32") - out = paddle.argmax(input=data, dtype="int32") - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - np_input = np.random.rand(3, 4).astype("float32") - expected_result = np.argmax(np_input, axis=1).astype(np.int32) - - result = exe.run(feed={"X": np_input}, fetch_list=[out]) - self.assertEqual((result == expected_result).all(), True) - - with fluid.program_guard(fluid.Program()): - data1 = fluid.data(name="X", shape=[3, 4], dtype="float32") - data2 = fluid.data(name="Y", shape=[3], dtype="int64") - out = paddle.argmax(input=data, out=data2) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - result = exe.run( - feed={"X": np.random.rand(3, 4).astype("float32")}, - fetch_list=[data2, out]) - self.assertEqual((result[0] == result[1]).all(), True) - - def test_name(self): - with fluid.program_guard(fluid.Program()): - x = fluid.data(name="x", shape=[100], dtype="float32") - y_1 = paddle.argmax(x, name='arg_max_res') - self.assertEqual(('arg_max_res' in y_1.name), True) - - def test_errors(self): - def test_dtype1(): - with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data(name="data", shape=[10], dtype="float32") - paddle.argmax(data, dtype="float32") - - self.assertRaises(TypeError, test_dtype1) - - def test_dtype2(): - with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data(name="data", shape=[10], dtype="float64") - paddle.argmax(data, dtype="float32") - - self.assertRaises(TypeError, test_dtype2) - - -class TestArgMinMaxOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - - def test_argmax_x_type(): - x1 = [1, 2, 3] - output = fluid.layers.argmax(x=x1) - - self.assertRaises(TypeError, test_argmax_x_type) - - def test_argmin_x_type(): - x2 = [1, 2, 3] - output = fluid.layers.argmin(x=x2) - - self.assertRaises(TypeError, test_argmin_x_type) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py new file mode 100644 index 0000000000000000000000000000000000000000..7c1f9d802c31ac2c3b244541936ba25018e1487a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py @@ -0,0 +1,313 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid import Program, program_guard + + +def create_kernel_case(op_type, numpy_op_type): + class ArgMinMaxKernelBaseCase(OpTest): + def initTestCase(self): + self.op_type = op_type + self.numpy_op_type = numpy_op_type + self.axis = 0 + + def setUp(self): + np.random.seed(123) + self.initTestCase() + self.dims = (4, 5, 6) + self.dtype = "float64" + self.x = (1000 * np.random.random(self.dims).astype(self.dtype)) + self.inputs = {'X': self.x} + self.attrs = {"axis": self.axis} + self.numpy_op = eval("np.%s" % (numpy_op_type)) + self.outputs = {'Out': self.numpy_op(self.x, axis=self.axis)} + + def test_check_output(self): + paddle.enable_static() + self.check_output() + + class ArgMinMaxKernelCase0(ArgMinMaxKernelBaseCase): + def initTestCase(self): + self.op_type = op_type + self.numpy_op_type = numpy_op_type + self.axis = 1 + + class ArgMinMaxKernelCase1(ArgMinMaxKernelBaseCase): + def initTestCase(self): + self.op_type = op_type + self.numpy_op_type = numpy_op_type + self.axis = 2 + + class ArgMinMaxKernelCase2(ArgMinMaxKernelBaseCase): + def initTestCase(self): + self.op_type = op_type + self.numpy_op_type = numpy_op_type + self.axis = -1 + + class ArgMinMaxKernelCase3(ArgMinMaxKernelBaseCase): + def initTestCase(self): + self.op_type = op_type + self.numpy_op_type = numpy_op_type + self.axis = -2 + + class ArgMinMaxKernelCase4(ArgMinMaxKernelBaseCase): + def setUp(self): + self.initTestCase() + self.dims = (4, 5, 6) + self.dtype = "float64" + self.x = (1000 * np.random.random(self.dims).astype(self.dtype)) + self.inputs = {'X': self.x} + self.attrs = {"axis": self.axis, "keepdims": True} + self.numpy_op = eval("np.%s" % (numpy_op_type)) + self.outputs = { + 'Out': self.numpy_op( + self.x, axis=self.axis).reshape((1, 5, 6)) + } + + class ArgMinMaxKernelCase5(ArgMinMaxKernelBaseCase): + def setUp(self): + self.initTestCase() + self.dims = (4) + self.dtype = "float64" + self.x = (1000 * np.random.random(self.dims).astype(self.dtype)) + self.inputs = {'X': self.x} + self.attrs = {"axis": self.axis, "flatten": True} + self.numpy_op = eval("np.%s" % (numpy_op_type)) + self.outputs = { + 'Out': self.numpy_op( + self.x.flatten(), axis=self.axis) + } + + class ArgMinMaxKernelCase6(ArgMinMaxKernelBaseCase): + def setUp(self): + self.initTestCase() + self.dims = (4) + self.dtype = "float64" + self.x = (1000 * np.random.random(self.dims).astype(self.dtype)) + self.inputs = {'X': self.x} + self.attrs = {"axis": self.axis, "flatten": True, "keepdims": True} + self.numpy_op = eval("np.%s" % (numpy_op_type)) + self.outputs = { + 'Out': + np.array(self.numpy_op( + self.x.flatten(), axis=self.axis)) + } + + cls_name = "ArgMinMaxKernelBaseCase_%s" % (op_type) + ArgMinMaxKernelBaseCase.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelBaseCase + + cls_name = "ArgMinMaxKernelCase0_%s" % (op_type) + ArgMinMaxKernelCase0.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase0 + + cls_name = "ArgMinMaxKernelCase1_%s" % (op_type) + ArgMinMaxKernelCase1.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase1 + + cls_name = "ArgMinMaxKernelCase2_%s" % (op_type) + ArgMinMaxKernelCase2.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase2 + + cls_name = "ArgMinMaxKernelCase3_%s" % (op_type) + ArgMinMaxKernelCase3.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase3 + + cls_name = "ArgMinMaxKernelCase4_%s" % (op_type) + ArgMinMaxKernelCase4.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase4 + + cls_name = "ArgMinMaxKernelCase5_%s" % (op_type) + ArgMinMaxKernelCase5.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase5 + + cls_name = "ArgMinMaxKernelCase6_%s" % (op_type) + ArgMinMaxKernelCase6.__name__ = cls_name + globals()[cls_name] = ArgMinMaxKernelCase6 + + +for op_type, numpy_op_type in zip(['arg_max', 'arg_min'], ['argmax', 'argmin']): + create_kernel_case(op_type, numpy_op_type) + + +def create_test_case(op_type): + class ArgMaxMinTestCase(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.input_data = np.random.rand(10, 10).astype("float32") + self.places = [] + self.places.append(fluid.CPUPlace()) + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + self.op = eval("paddle.%s" % (op_type)) + self.numpy_op = eval("np.%s" % (op_type)) + + def run_static(self, place): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + data_var = paddle.static.data( + name="data", shape=[10, 10], dtype="float32") + op = eval("paddle.%s" % (op_type)) + result = op(data_var) + exe = paddle.static.Executor(place) + result_data = exe.run(feed={"data": self.input_data}, + fetch_list=[result]) + expected_data = self.numpy_op(self.input_data) + self.assertTrue((result_data == np.array(expected_data)).all(), + True) + + with paddle.static.program_guard(paddle.static.Program()): + data_var = paddle.static.data( + name="data", shape=[10, 10], dtype="float32") + op = eval("paddle.%s" % (op_type)) + result = op(data_var, axis=1) + exe = paddle.static.Executor(place) + result_data = exe.run(feed={"data": self.input_data}, + fetch_list=[result]) + expected_data = self.numpy_op(self.input_data, axis=1) + self.assertTrue((result_data == expected_data).all(), True) + + with paddle.static.program_guard(paddle.static.Program()): + data_var = paddle.static.data( + name="data", shape=[10, 10], dtype="float32") + op = eval("paddle.%s" % (op_type)) + result = op(data_var, axis=-1) + exe = paddle.static.Executor(place) + result_data = exe.run(feed={"data": self.input_data}, + fetch_list=[result]) + expected_data = self.numpy_op(self.input_data, axis=-1) + self.assertTrue((result_data == expected_data).all(), True) + + with paddle.static.program_guard(paddle.static.Program()): + data_var = paddle.static.data( + name="data", shape=[10, 10], dtype="float32") + + op = eval("paddle.%s" % (op_type)) + result = op(data_var, axis=-1, keepdim=True) + exe = paddle.static.Executor(place) + result_data = exe.run(feed={"data": self.input_data}, + fetch_list=[result]) + expected_data = self.numpy_op( + self.input_data, axis=-1).reshape((10, 1)) + self.assertTrue((result_data == expected_data).all(), True) + + with paddle.static.program_guard(paddle.static.Program()): + op = eval("paddle.%s" % (op_type)) + data_var = paddle.static.data( + name="data", shape=[10, 10], dtype="float32") + result = op(data_var, axis=-1, name="test_arg_api") + self.assertTrue("test_arg_api" in result.name) + + def run_dygraph(self, place): + paddle.disable_static() + op = eval("paddle.%s" % (op_type)) + data_tensor = paddle.to_tensor(self.input_data) + + #case 1 + result_data = op(data_tensor) + excepted_data = self.numpy_op(self.input_data) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + #case 2 + result_data = op(data_tensor, axis=1) + excepted_data = self.numpy_op(self.input_data, axis=1) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + #case 3 + result_data = op(data_tensor, axis=-1) + excepted_data = self.numpy_op(self.input_data, axis=-1) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + #case 4 + result_data = op(data_tensor, axis=-1, keepdim=True) + excepted_data = self.numpy_op(self.input_data, axis=-1) + excepted_data = excepted_data.reshape((10)) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + #case 5 + result_data = op(data_tensor, axis=-1, keepdim=True, dtype="int32") + self.assertTrue(result_data.numpy().dtype == np.int32) + + # case for dim 4, 5, 6, for test case coverage + input_data = np.random.rand(5, 5, 5, 5) + excepted_data = self.numpy_op(input_data, axis=0) + result_data = op(paddle.to_tensor(input_data), axis=0) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + input_data = np.random.rand(4, 4, 4, 4, 4) + excepted_data = self.numpy_op(input_data, axis=0) + result_data = op(paddle.to_tensor(input_data), axis=0) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + input_data = np.random.rand(3, 3, 3, 3, 3, 3) + excepted_data = self.numpy_op(input_data, axis=0) + result_data = op(paddle.to_tensor(input_data), axis=0) + self.assertTrue((result_data.numpy() == excepted_data).all(), True) + + def test_case(self): + for place in self.places: + self.run_static(place) + self.run_dygraph(place) + + cls_name = "ArgMaxMinTestCase_{}".format(op_type) + ArgMaxMinTestCase.__name__ = cls_name + globals()[cls_name] = ArgMaxMinTestCase + + +for op_type in ['argmin', 'argmax']: + create_test_case(op_type) + + +class TestArgMinMaxOpError(unittest.TestCase): + def test_errors(self): + paddle.enable_static() + with program_guard(Program(), Program()): + + def test_argmax_x_type(): + x1 = [1, 2, 3] + output = paddle.argmax(x=x1) + + self.assertRaises(TypeError, test_argmax_x_type) + + def test_argmin_x_type(): + x2 = [1, 2, 3] + output = paddle.argmin(x=x2) + + self.assertRaises(TypeError, test_argmin_x_type) + + def test_argmax_attr_type(): + data = paddle.static.data( + name="test_argmax", shape=[10], dtype="float32") + output = paddle.argmax(x=data, dtype="float32") + + self.assertRaises(ValueError, test_argmax_attr_type) + + def test_argmin_attr_type(): + data = paddle.static.data( + name="test_argmax", shape=[10], dtype="float32") + output = paddle.argmin(x=data, dtype="float32") + + self.assertRaises(ValueError, test_argmin_attr_type) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py index 729fe20c8f87ed1cd07b7f5c2784a79acd1fa54b..fd009db5fd00133c5bad7c8c52662002ebd03fa8 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py @@ -59,6 +59,10 @@ class AutoCheckPointACLBase(AutoCheckpointBase): os.environ.clear() os.environ.update(self._old_environ) + file_name = os.path.basename(__file__) + base_name = os.path.splitext(file_name)[0] + print("runnng name:", base_name) + def _run_normal(self): exe, main_prog, startup_prog = self._generate() @@ -182,6 +186,20 @@ class AutoCheckPointACLBase(AutoCheckpointBase): fs.delete(save_dir) logger.info("begin _run_load_0") + def _test_corner_epoch_no(self, break_epoch_no): + logger.info("begin test_corener_epoch_no") + checker = acp._get_checker() + fs = HDFSClient(checker.hdfs_home, None) + + fs.delete(checker.hdfs_checkpoint_path) + self._reset_generator() + self._run_save_0(break_epoch_no=break_epoch_no) + self._reset_generator() + self._run_load_0(break_epoch_no=break_epoch_no) + + fs.delete(checker.hdfs_checkpoint_path) + logger.info("end test_corener_epoch_no") + class AutoCheckpointTest(AutoCheckPointACLBase): def setUp(self): @@ -193,13 +211,13 @@ class AutoCheckpointTest(AutoCheckPointACLBase): "PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT", "PADDLE_TRAINER_ID": "0", "PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD", - "PADDLE_JOB_ID": "test_job_auto_1", + "PADDLE_JOB_ID": "test_job_auto_0", "PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7", "PADDLE_EDL_HDFS_NAME": "", "PADDLE_EDL_HDFS_UGI": "", - "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_1", + "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_0", "PADDLE_EDL_ONLY_FOR_CE_TEST": "1", - "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_1", + "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_0", "PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0" } os.environ.update(proc_env) @@ -246,102 +264,6 @@ class AutoCheckpointTest(AutoCheckPointACLBase): logger.info("end test_not_use") - def test_multiple(self): - checker = acp._get_checker() - fs = HDFSClient(checker.hdfs_home, None) - fs.delete(checker.hdfs_checkpoint_path) - self._reset_generator() - - logger.info("begin test_multiple") - fs = LocalFS() - save_dir = "./run_save_0" - fs.delete(save_dir) - - exe, main_prog1, startup_prog1 = self._generate() - _, main_prog2, startup_prog2 = self._generate() - - compiled1, data_loader1, optimizer1, loss1, image1, label1 = \ - self._init_env(exe, main_prog1, startup_prog1) - - compiled2, data_loader2, optimizer2, loss2, image2, label2 = \ - self._init_env(exe, main_prog2, startup_prog2) - - o = None - epochs = [] - for i in acp.train_epoch_range(3, 0): - for data in data_loader1(): - fetch = exe.run(compiled1, feed=data, fetch_list=[loss1]) - - for data in data_loader2(): - fetch = exe.run(compiled2, feed=data, fetch_list=[loss2]) - - o = acp._get_train_epoch_range() - self.assertEqual(len(o._exe_status), 2) - print(o._exe_status) - epochs.append(i) - - o = acp._get_train_epoch_range() - self.assertTrue(o == None, "now train epoch must not exits now") - self.assertEqual(i, 2) - self.assertEqual(epochs, [0, 1, 2]) - - fs.delete(save_dir) - logger.info("end test_multiple") - - def test_distributed_basic(self): - checker = acp._get_checker() - fs = HDFSClient(checker.hdfs_home, None) - fs.delete(checker.hdfs_checkpoint_path) - self._reset_generator() - - logger.info("begin test_distributed_basic") - fs = LocalFS() - save_dir = "./run_save_0" - fs.delete(save_dir) - - #basic - exe, main_prog, startup_prog = self._generate() - - compiled, data_loader, optimizer, loss, image, label = \ - self._init_env(exe, main_prog, startup_prog, minimize=False) - - #fleet - os.environ["TRAINING_ROLE"] = "TRAINER" - os.environ["PADDLE_TRAINER_ID"] = "0" - os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:6070" - - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - - with fluid.program_guard(main_prog, startup_prog): - dist_optimizer = fleet.distributed_optimizer(optimizer) - dist_optimizer.minimize(loss) - - exe.run(startup_prog) - - o = None - i = 0 - name = None - for i in acp.train_epoch_range(3, 0): - o = acp._get_train_epoch_range() - name = o.name - logger.info("_run_save_0 name:{} epoch_no:{}".format(o.name, i)) - - for data in data_loader(): - fetch = exe.run(fleet.main_program, - feed=data, - fetch_list=[loss]) - - self.assertEqual(len(o._exe_status), 1) - - o = acp._get_train_epoch_range() - assert o == None, "now train epoch must not exits now" - self.assertEqual(i, 2) - - fs.delete(save_dir) - - logger.info("end test_distributed_basic") - def test_checker(self): os.environ.pop("PADDLE_JOB_ID", None) try: diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py new file mode 100644 index 0000000000000000000000000000000000000000..55173325f621f7333a7c3ca32a9c55becee72e5a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient +import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp +from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel +from paddle.fluid.framework import program_guard +from paddle.fluid import unique_name + +import numpy as np +from paddle.io import Dataset, BatchSampler, DataLoader + +from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase + +logger = get_logger() + + +class AutoCheckpointTest1(AutoCheckPointACLBase): + def setUp(self): + get_logger() + logger.info("enter tests") + + self._old_environ = dict(os.environ) + proc_env = { + "PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT", + "PADDLE_TRAINER_ID": "0", + "PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD", + "PADDLE_JOB_ID": "test_job_auto_1", + "PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7", + "PADDLE_EDL_HDFS_NAME": "", + "PADDLE_EDL_HDFS_UGI": "", + "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_1", + "PADDLE_EDL_ONLY_FOR_CE_TEST": "1", + "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_1", + "PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0" + } + os.environ.update(proc_env) + + def test_corner_epoch_no(self): + self._test_corner_epoch_no(0) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py index 30a743510537e1fa0e2aeedb18de25c7e1fd120c..5d72fa01008af55a83d7b9a19747a8d96fb74b2b 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py @@ -57,19 +57,7 @@ class AutoCheckpointTest2(AutoCheckPointACLBase): os.environ.update(proc_env) def test_corner_epoch_no(self): - logger.info("begin test_corener_epoch_no") - checker = acp._get_checker() - fs = HDFSClient(checker.hdfs_home, None) - - for i in range(3): - fs.delete(checker.hdfs_checkpoint_path) - self._reset_generator() - self._run_save_0(break_epoch_no=i) - self._reset_generator() - self._run_load_0(break_epoch_no=i) - - fs.delete(checker.hdfs_checkpoint_path) - logger.info("end test_corener_epoch_no") + self._test_corner_epoch_no(1) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py new file mode 100644 index 0000000000000000000000000000000000000000..5382f7e328ed1afa2d7516cd0d8db2db659aadd7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient +import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp +from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel +from paddle.fluid.framework import program_guard +from paddle.fluid import unique_name + +import numpy as np +from paddle.io import Dataset, BatchSampler, DataLoader + +from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase + +logger = get_logger() + + +class AutoCheckpointTest3(AutoCheckPointACLBase): + def setUp(self): + get_logger() + logger.info("enter tests") + + self._old_environ = dict(os.environ) + proc_env = { + "PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT", + "PADDLE_TRAINER_ID": "0", + "PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD", + "PADDLE_JOB_ID": "test_job_auto_3", + "PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7", + "PADDLE_EDL_HDFS_NAME": "", + "PADDLE_EDL_HDFS_UGI": "", + "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_3", + "PADDLE_EDL_ONLY_FOR_CE_TEST": "1", + "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_3", + "PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0" + } + os.environ.update(proc_env) + + def test_corner_epoch_no(self): + self._test_corner_epoch_no(2) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..90db9595d92ef602c03fa7dd104484a4f6101a87 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient +import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp +from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel +from paddle.fluid.framework import program_guard +from paddle.fluid import unique_name + +import numpy as np +from paddle.io import Dataset, BatchSampler, DataLoader + +from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase + +logger = get_logger() + + +class AutoCheckpointTestDist(AutoCheckPointACLBase): + def setUp(self): + get_logger() + logger.info("enter tests") + + self._old_environ = dict(os.environ) + proc_env = { + "PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT", + "PADDLE_TRAINER_ID": "0", + "PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD", + "PADDLE_JOB_ID": "test_job_auto_dist_basic", + "PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7", + "PADDLE_EDL_HDFS_NAME": "", + "PADDLE_EDL_HDFS_UGI": "", + "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_dist_basic", + "PADDLE_EDL_ONLY_FOR_CE_TEST": "1", + "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_dist_basic", + "PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0" + } + os.environ.update(proc_env) + + def test_distributed_basic(self): + checker = acp._get_checker() + fs = HDFSClient(checker.hdfs_home, None) + fs.delete(checker.hdfs_checkpoint_path) + self._reset_generator() + + logger.info("begin test_distributed_basic") + fs = LocalFS() + save_dir = "./run_save_0" + fs.delete(save_dir) + + #basic + exe, main_prog, startup_prog = self._generate() + + compiled, data_loader, optimizer, loss, image, label = \ + self._init_env(exe, main_prog, startup_prog, minimize=False) + + #fleet + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["PADDLE_TRAINER_ID"] = "0" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:6070" + + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + + with fluid.program_guard(main_prog, startup_prog): + dist_optimizer = fleet.distributed_optimizer(optimizer) + dist_optimizer.minimize(loss) + + exe.run(startup_prog) + + o = None + i = 0 + name = None + for i in acp.train_epoch_range(3, 0): + o = acp._get_train_epoch_range() + name = o.name + logger.info("_run_save_0 name:{} epoch_no:{}".format(o.name, i)) + + for data in data_loader(): + fetch = exe.run(fleet.main_program, + feed=data, + fetch_list=[loss]) + + self.assertEqual(len(o._exe_status), 1) + + o = acp._get_train_epoch_range() + assert o == None, "now train epoch must not exits now" + self.assertEqual(i, 2) + + fs.delete(save_dir) + + logger.info("end test_distributed_basic") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py new file mode 100644 index 0000000000000000000000000000000000000000..8c10cd0e9922859bf3bad2015587fc0a6b2ba5da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils.fs import LocalFS, HDFSClient +import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp +from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel +from paddle.fluid.framework import program_guard +from paddle.fluid import unique_name + +import numpy as np +from paddle.io import Dataset, BatchSampler, DataLoader + +from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase + +logger = get_logger() + + +class AutoCheckpointTestMul(AutoCheckPointACLBase): + def setUp(self): + get_logger() + logger.info("enter tests") + + self._old_environ = dict(os.environ) + proc_env = { + "PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT", + "PADDLE_TRAINER_ID": "0", + "PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD", + "PADDLE_JOB_ID": "test_job_auto_dist_multiple", + "PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7", + "PADDLE_EDL_HDFS_NAME": "", + "PADDLE_EDL_HDFS_UGI": "", + "PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_dist_multiple", + "PADDLE_EDL_ONLY_FOR_CE_TEST": "1", + "PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_dist_multiple", + "PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0" + } + os.environ.update(proc_env) + + def test_multiple(self): + checker = acp._get_checker() + fs = HDFSClient(checker.hdfs_home, None) + fs.delete(checker.hdfs_checkpoint_path) + self._reset_generator() + + logger.info("begin test_multiple") + fs = LocalFS() + save_dir = "./run_save_0" + fs.delete(save_dir) + + exe, main_prog1, startup_prog1 = self._generate() + _, main_prog2, startup_prog2 = self._generate() + + compiled1, data_loader1, optimizer1, loss1, image1, label1 = \ + self._init_env(exe, main_prog1, startup_prog1) + + compiled2, data_loader2, optimizer2, loss2, image2, label2 = \ + self._init_env(exe, main_prog2, startup_prog2) + + o = None + epochs = [] + for i in acp.train_epoch_range(3, 0): + for data in data_loader1(): + fetch = exe.run(compiled1, feed=data, fetch_list=[loss1]) + + for data in data_loader2(): + fetch = exe.run(compiled2, feed=data, fetch_list=[loss2]) + + o = acp._get_train_epoch_range() + self.assertEqual(len(o._exe_status), 2) + print(o._exe_status) + epochs.append(i) + + o = acp._get_train_epoch_range() + self.assertTrue(o == None, "now train epoch must not exits now") + self.assertEqual(i, 2) + self.assertEqual(epochs, [0, 1, 2]) + + fs.delete(save_dir) + logger.info("end test_multiple") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index bc666c0de5be06be7529bced39071303430c8ace..875f6211a7fbd98463d98dff91d93cc1b431fc86 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -86,6 +86,31 @@ class TestBaseLayer(unittest.TestCase): ret = l() self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2]))) + def test_add_parameter_with_error(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + param = net.create_parameter(shape=[1]) + + with self.assertRaises(TypeError): + net.add_parameter(10, param) + + with self.assertRaises(KeyError): + net.add_parameter("param.name", param) + + with self.assertRaises(KeyError): + net.add_parameter("", param) + + with self.assertRaises(KeyError): + net.test_param = 10 + net.add_parameter("test_param", param) + + with self.assertRaises(TypeError): + net.add_parameter("no_param", 10) + + load_param = net.create_parameter(shape=[1]) + net._loaddict_holder[load_param.name] = load_param + net.add_parameter("load_param", load_param) + class BufferLayer(fluid.Layer): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/test_batch_sampler.py b/python/paddle/fluid/tests/unittests/test_batch_sampler.py index 7d90bbd0357bcc93cf7a66e99082feeb7e254db4..6ec6fdb59f200ce1dc9b6418b7f11329f85ba5dd 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_batch_sampler.py @@ -17,7 +17,7 @@ from __future__ import division import unittest import paddle.fluid as fluid -from paddle.io import BatchSampler, Dataset +from paddle.io import BatchSampler, Dataset, Sampler, SequenceSampler, RandomSampler class RandomDataset(Dataset): @@ -35,6 +35,72 @@ class RandomDataset(Dataset): return self.sample_num +class TestSampler(unittest.TestCase): + def test_main(self): + dataset = RandomDataset(100, 10) + sampler = Sampler(dataset) + try: + iter(sampler) + self.assertTrue(False) + except NotImplementedError: + pass + + +class TestSequenceSampler(unittest.TestCase): + def test_main(self): + dataset = RandomDataset(100, 10) + sampler = SequenceSampler(dataset) + assert len(sampler) == 100 + + for i, index in enumerate(iter(sampler)): + assert i == index + + +class TestRandomSampler(unittest.TestCase): + def test_main(self): + dataset = RandomDataset(100, 10) + sampler = RandomSampler(dataset) + assert len(sampler) == 100 + + rets = [] + for i in iter(sampler): + rets.append(i) + assert tuple(sorted(rets)) == tuple(range(0, 100)) + + def test_with_num_samples(self): + dataset = RandomDataset(100, 10) + sampler = RandomSampler(dataset, num_samples=50, replacement=True) + assert len(sampler) == 50 + + rets = [] + for i in iter(sampler): + rets.append(i) + assert i >= 0 and i < 100 + + def test_with_generator(self): + dataset = RandomDataset(100, 10) + generator = iter(range(0, 60)) + sampler = RandomSampler(dataset, generator=generator) + assert len(sampler) == 100 + + rets = [] + for i in iter(sampler): + rets.append(i) + assert tuple(sorted(rets)) == tuple(range(0, 60)) + + def test_with_generator_num_samples(self): + dataset = RandomDataset(100, 10) + generator = iter(range(0, 60)) + sampler = RandomSampler( + dataset, generator=generator, num_samples=50, replacement=True) + assert len(sampler) == 50 + + rets = [] + for i in iter(sampler): + rets.append(i) + assert tuple(sorted(rets)) == tuple(range(0, 50)) + + class TestBatchSampler(unittest.TestCase): def setUp(self): self.num_samples = 1000 @@ -86,16 +152,18 @@ class TestBatchSamplerShuffle(TestBatchSampler): self.drop_last = True -class TestBatchSamplerWithIndices(TestBatchSampler): +class TestBatchSamplerWithSampler(TestBatchSampler): def init_batch_sampler(self): + dataset = RandomDataset(1000, 10) + sampler = SequenceSampler(dataset) bs = BatchSampler( - indices=list(range(self.num_samples)), + sampler=sampler, batch_size=self.batch_size, drop_last=self.drop_last) return bs -class TestBatchSamplerWithIndicesAndDataSource(unittest.TestCase): +class TestBatchSamplerWithSamplerDropLast(unittest.TestCase): def setUp(self): self.num_samples = 1000 self.num_classes = 10 @@ -103,12 +171,22 @@ class TestBatchSamplerWithIndicesAndDataSource(unittest.TestCase): self.shuffle = False self.drop_last = True + +class TestBatchSamplerWithSamplerShuffle(unittest.TestCase): + def setUp(self): + self.num_samples = 1000 + self.num_classes = 10 + self.batch_size = 32 + self.shuffle = True + self.drop_last = True + def test_main(self): try: dataset = RandomDataset(self.num_samples, self.num_classes) + sampler = RandomSampler(dataset) bs = BatchSampler( - dataset=dataset, - indices=list(range(self.num_samples)), + sampler=sampler, + shuffle=self.shuffle, batch_size=self.batch_size, drop_last=self.drop_last) self.assertTrue(False) diff --git a/python/paddle/fluid/tests/unittests/test_bce_loss.py b/python/paddle/fluid/tests/unittests/test_bce_loss.py index 21571e0981065a0a1e2a5db03e91b4df0ea55d9a..a8054295b41c1f6d0008c4f0a9fadb6f04c647fc 100644 --- a/python/paddle/fluid/tests/unittests/test_bce_loss.py +++ b/python/paddle/fluid/tests/unittests/test_bce_loss.py @@ -19,93 +19,189 @@ import unittest from op_test import OpTest +def test_static_layer(place, + input_np, + label_np, + reduction='mean', + weight_np=None): + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.data(name='input', shape=input_np.shape, dtype='float64') + label = paddle.data(name='label', shape=label_np.shape, dtype='float64') + if weight_np is not None: + weight = paddle.data( + name='weight', shape=weight_np.shape, dtype='float64') + bce_loss = paddle.nn.loss.BCELoss( + weight=weight, reduction=reduction) + else: + bce_loss = paddle.nn.loss.BCELoss(reduction=reduction) + res = bce_loss(input, label) + exe = paddle.static.Executor(place) + static_result = exe.run(prog, + feed={"input": input_np, + "label": label_np} + if weight_np is None else { + "input": input_np, + "label": label_np, + "weight": weight_np + }, + fetch_list=[res]) + return static_result + + +def test_static_functional(place, + input_np, + label_np, + reduction='mean', + weight_np=None): + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.data(name='input', shape=input_np.shape, dtype='float64') + label = paddle.data(name='label', shape=label_np.shape, dtype='float64') + if weight_np is not None: + weight = paddle.data( + name='weight', shape=weight_np.shape, dtype='float64') + res = paddle.nn.functional.binary_cross_entropy( + input, label, weight=weight, reduction=reduction) + else: + res = paddle.nn.functional.binary_cross_entropy( + input, label, reduction=reduction) + exe = paddle.static.Executor(place) + static_result = exe.run(prog, + feed={"input": input_np, + "label": label_np} + if weight_np is None else { + "input": input_np, + "label": label_np, + "weight": weight_np + }, + fetch_list=[res]) + return static_result + + +def test_dygraph_layer(place, + input_np, + label_np, + reduction='mean', + weight_np=None): + paddle.disable_static() + if weight_np is not None: + weight = paddle.to_tensor(weight_np) + bce_loss = paddle.nn.loss.BCELoss(weight=weight, reduction=reduction) + else: + bce_loss = paddle.nn.loss.BCELoss(reduction=reduction) + dy_res = bce_loss(paddle.to_tensor(input_np), paddle.to_tensor(label_np)) + dy_result = dy_res.numpy() + paddle.enable_static() + return dy_result + + +def test_dygraph_functional(place, + input_np, + label_np, + reduction='mean', + weight_np=None): + paddle.disable_static() + input = paddle.to_tensor(input_np) + label = paddle.to_tensor(label_np) + + if weight_np is not None: + weight = paddle.to_tensor(weight_np) + dy_res = paddle.nn.functional.binary_cross_entropy( + input, label, weight=weight, reduction=reduction) + else: + dy_res = paddle.nn.functional.binary_cross_entropy( + input, label, reduction=reduction) + dy_result = dy_res.numpy() + paddle.enable_static() + return dy_result + + +def calc_bceloss(input_np, label_np, reduction='mean', weight_np=None): + if weight_np is None: + expected = -1 * (label_np * np.log(input_np) + + (1. - label_np) * np.log(1. - input_np)) + else: + expected = -1 * weight_np * (label_np * np.log(input_np) + + (1. - label_np) * np.log(1. - input_np)) + + if reduction == 'mean': + expected = np.mean(expected) + elif reduction == 'sum': + expected = np.sum(expected) + else: + expected = expected + + return expected + + class TestBCELoss(unittest.TestCase): def test_BCELoss(self): - input_np = np.random.random(size=(20, 30)).astype(np.float64) - label_np = np.random.random(size=(20, 30)).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() + input_np = np.random.uniform(0.1, 0.8, size=(20, 30)).astype(np.float64) + label_np = np.random.randint(0, 2, size=(20, 30)).astype(np.float64) places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) reductions = ['sum', 'mean', 'none'] for place in places: - for red in reductions: - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[None, 30], dtype='float64') - label = fluid.data( - name='label', shape=[None, 30], dtype='float64') - bce_loss = paddle.nn.loss.BCELoss(reduction=red) - res = bce_loss(input, label) - - exe = fluid.Executor(place) - static_result = exe.run( - prog, - feed={"input": input_np, - "label": label_np}, - fetch_list=[res]) - - with fluid.dygraph.guard(): - bce_loss = paddle.nn.loss.BCELoss(reduction=red) - dy_res = bce_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) - dy_result = dy_res.numpy() - - expected = -1 * (label_np * np.log(input_np) + - (1. - label_np) * np.log(1. - input_np)) - if red == 'mean': - expected = np.mean(expected) - elif red == 'sum': - expected = np.sum(expected) - else: - expected = expected + for reduction in reductions: + static_result = test_static_layer(place, input_np, label_np, + reduction) + dy_result = test_dygraph_layer(place, input_np, label_np, + reduction) + expected = calc_bceloss(input_np, label_np, reduction) self.assertTrue(np.allclose(static_result, expected)) self.assertTrue(np.allclose(static_result, dy_result)) self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static_functional(place, input_np, + label_np, reduction) + dy_functional = test_dygraph_functional(place, input_np, + label_np, reduction) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) def test_BCELoss_weight(self): - input_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) - label_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) + input_np = np.random.uniform( + 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint( + 0, 2, size=(2, 3, 4, 10)).astype(np.float64) weight_np = np.random.random(size=(3, 4, 10)).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[None, 3, 4, 10], dtype='float64') - label = fluid.data( - name='label', shape=[None, 3, 4, 10], dtype='float64') - weight = fluid.data( - name='weight', shape=[3, 4, 10], dtype='float64') - bce_loss = paddle.nn.loss.BCELoss(weight=weight) - res = bce_loss(input, label) - - exe = fluid.Executor(place) - static_result = exe.run(prog, - feed={ - "input": input_np, - "label": label_np, - "weight": weight_np - }, - fetch_list=[res]) - - with fluid.dygraph.guard(): - bce_loss = paddle.nn.loss.BCELoss( - weight=fluid.dygraph.to_variable(weight_np)) - dy_res = bce_loss( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) - dy_result = dy_res.numpy() - - expected = np.mean(-1 * weight_np * - (label_np * np.log(input_np) + - (1. - label_np) * np.log(1. - input_np))) - self.assertTrue(np.allclose(static_result, expected)) - self.assertTrue(np.allclose(static_result, dy_result)) - self.assertTrue(np.allclose(dy_result, expected)) + for reduction in ['sum', 'mean', 'none']: + static_result = test_static_layer( + place, input_np, label_np, reduction, weight_np=weight_np) + dy_result = test_dygraph_layer( + place, input_np, label_np, reduction, weight_np=weight_np) + expected = calc_bceloss( + input_np, label_np, reduction, weight_np=weight_np) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static_functional( + place, input_np, label_np, reduction, weight_np=weight_np) + dy_functional = test_dygraph_functional( + place, input_np, label_np, reduction, weight_np=weight_np) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_BCELoss_error(self): + paddle.disable_static() + self.assertRaises( + ValueError, paddle.nn.loss.BCELoss, reduction="unsupport reduction") + input = paddle.to_tensor([[0.1, 0.3]], dtype='float32') + label = paddle.to_tensor([[0.0, 1.0]], dtype='float32') + self.assertRaises( + ValueError, + paddle.nn.functional.binary_cross_entropy, + input=input, + label=label, + reduction="unsupport reduction") + paddle.enable_static() def bce_loss(input, label): diff --git a/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..5ba13a6da01c7dbf8b0e854df43f11b19a4ebd4c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py @@ -0,0 +1,260 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.fluid as fluid +import numpy as np +import unittest +from op_test import OpTest + + +def call_bce_layer(logit, label, weight=None, reduction='mean', + pos_weight=None): + bce_logit_loss = paddle.nn.loss.BCEWithLogitsLoss( + weight=weight, reduction=reduction, pos_weight=pos_weight) + res = bce_logit_loss(logit, label) + return res + + +def call_bce_functional(logit, + label, + weight=None, + reduction='mean', + pos_weight=None): + res = paddle.nn.functional.binary_cross_entropy_with_logits( + logit, label, weight=weight, reduction=reduction, pos_weight=pos_weight) + return res + + +def test_static(place, + logit_np, + label_np, + weight_np=None, + reduction='mean', + pos_weight_np=None, + functional=False): + paddle.enable_static() + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + with paddle.static.program_guard(prog, startup_prog): + logit = paddle.data(name='logit', shape=logit_np.shape, dtype='float64') + label = paddle.data(name='label', shape=label_np.shape, dtype='float64') + feed_dict = {"logit": logit_np, "label": label_np} + + pos_weight = None + weight = None + if pos_weight_np is not None: + pos_weight = paddle.data( + name='pos_weight', shape=pos_weight_np.shape, dtype='float64') + feed_dict["pos_weight"] = pos_weight_np + if weight_np is not None: + weight = paddle.data( + name='weight', shape=weight_np.shape, dtype='float64') + feed_dict["weight"] = weight_np + if functional: + res = call_bce_functional(logit, label, weight, reduction, + pos_weight) + else: + res = call_bce_layer(logit, label, weight, reduction, pos_weight) + exe = paddle.static.Executor(place) + static_result = exe.run(prog, feed=feed_dict, fetch_list=[res]) + return static_result + + +def test_dygraph(place, + logit_np, + label_np, + weight_np=None, + reduction='mean', + pos_weight_np=None, + functional=False): + paddle.disable_static() + logit = paddle.to_tensor(logit_np) + label = paddle.to_tensor(label_np) + weight = None + pos_weight = None + if weight_np is not None: + weight = paddle.to_tensor(weight_np) + if pos_weight_np is not None: + pos_weight = paddle.to_tensor(pos_weight_np) + if functional: + dy_res = call_bce_functional(logit, label, weight, reduction, + pos_weight) + else: + dy_res = call_bce_layer(logit, label, weight, reduction, pos_weight) + dy_result = dy_res.numpy() + paddle.enable_static() + return dy_result + + +def calc_bce_with_logits_loss(logit_np, + label_np, + reduction='mean', + weight_np=None, + pos_weight=None): + expected = np.maximum( + logit_np, + 0) - logit_np * label_np + np.log(1 + np.exp(-np.abs(logit_np))) + if pos_weight is not None: + expected = expected * ((pos_weight - 1) * label_np + 1) + if weight_np is not None: + expected = weight_np * expected + + if reduction == 'mean': + expected = np.mean(expected) + elif reduction == 'sum': + expected = np.sum(expected) + else: + expected = expected + + return expected + + +class TestBCEWithLogitsLoss(unittest.TestCase): + def test_BCEWithLogitsLoss(self): + logit_np = np.random.uniform(0.1, 0.8, size=(20, 30)).astype(np.float64) + label_np = np.random.randint(0, 2, size=(20, 30)).astype(np.float64) + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + reductions = ['sum', 'mean', 'none'] + for place in places: + for reduction in reductions: + static_result = test_static( + place, logit_np, label_np, reduction=reduction) + dy_result = test_dygraph( + place, logit_np, label_np, reduction=reduction) + expected = calc_bce_with_logits_loss(logit_np, label_np, + reduction) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static( + place, + logit_np, + label_np, + reduction=reduction, + functional=True) + dy_functional = test_dygraph( + place, + logit_np, + label_np, + reduction=reduction, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_BCEWithLogitsLoss_weight(self): + logit_np = np.random.uniform( + 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint( + 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + weight_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + for reduction in ['sum', 'mean', 'none']: + static_result = test_static( + place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction) + dy_result = test_dygraph( + place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction) + expected = calc_bce_with_logits_loss( + logit_np, label_np, reduction, weight_np=weight_np) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static( + place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction, + functional=True) + dy_functional = test_dygraph( + place, + logit_np, + label_np, + weight_np=weight_np, + reduction=reduction, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_BCEWithLogitsLoss_pos_weight(self): + logit_np = np.random.uniform( + 0.1, 0.8, size=(2, 3, 4, 10)).astype(np.float64) + label_np = np.random.randint( + 0, 2, size=(2, 3, 4, 10)).astype(np.float64) + pos_weight_np = np.random.random(size=(3, 4, 10)).astype(np.float64) + weight_np = np.random.random(size=(2, 3, 4, 10)).astype(np.float64) + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + reduction = "mean" + static_result = test_static(place, logit_np, label_np, weight_np, + reduction, pos_weight_np) + dy_result = test_dygraph(place, logit_np, label_np, weight_np, + reduction, pos_weight_np) + expected = calc_bce_with_logits_loss(logit_np, label_np, reduction, + weight_np, pos_weight_np) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + static_functional = test_static( + place, + logit_np, + label_np, + weight_np, + reduction, + pos_weight_np, + functional=True) + dy_functional = test_dygraph( + place, + logit_np, + label_np, + weight_np, + reduction, + pos_weight_np, + functional=True) + self.assertTrue(np.allclose(static_functional, expected)) + self.assertTrue(np.allclose(static_functional, dy_functional)) + self.assertTrue(np.allclose(dy_functional, expected)) + + def test_BCEWithLogitsLoss_error(self): + paddle.disable_static() + self.assertRaises( + ValueError, + paddle.nn.BCEWithLogitsLoss, + reduction="unsupport reduction") + logit = paddle.to_tensor([[0.1, 0.3]], dtype='float32') + label = paddle.to_tensor([[0.0, 1.0]], dtype='float32') + self.assertRaises( + ValueError, + paddle.nn.functional.binary_cross_entropy_with_logits, + logit=logit, + label=label, + reduction="unsupport reduction") + paddle.enable_static() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py new file mode 100644 index 0000000000000000000000000000000000000000..12a29de80426639ab3a9d2b879bb88a461ba2ab4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -0,0 +1,76 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle +from op_test import OpTest +import numpy as np + + +def output_hist(out): + hist, _ = np.histogram(out, bins=2) + hist = hist.astype("float32") + hist /= float(out.size) + prob = 0.5 * np.ones((2)) + return hist, prob + + +class TestBernoulliOp(OpTest): + def setUp(self): + self.op_type = "bernoulli" + self.inputs = {"X": np.random.uniform(size=(1000, 784))} + self.init_attrs() + self.outputs = {"Out": np.zeros((1000, 784)).astype("float32")} + + def init_attrs(self): + self.attrs = {} + self.output_hist = output_hist + + def test_check_output(self): + self.check_output_customized(self.verify_output) + + def verify_output(self, outs): + hist, prob = self.output_hist(np.array(outs[0])) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +class TestBernoulliApi(unittest.TestCase): + def test_dygraph(self): + paddle.disable_static() + x = paddle.rand([1024, 1024]) + out = paddle.bernoulli(x) + paddle.enable_static() + hist, prob = output_hist(out.numpy()) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + def test_static(self): + x = paddle.rand([1024, 1024]) + out = paddle.bernoulli(x) + exe = paddle.static.Executor(paddle.CPUPlace()) + out = exe.run(paddle.static.default_main_program(), + fetch_list=[out.name]) + hist, prob = output_hist(out[0]) + self.assertTrue( + np.allclose( + hist, prob, rtol=0, atol=0.01), "hist: " + str(hist)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_api.py b/python/paddle/fluid/tests/unittests/test_bilinear_api.py new file mode 100644 index 0000000000000000000000000000000000000000..24eae4797de85f371ed62e78c85b160f698ee9eb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_bilinear_api.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from op_test import OpTest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import numpy as np + + +class TestBilinearAPI(unittest.TestCase): + def test_api(self): + with fluid.program_guard(fluid.default_startup_program(), + fluid.default_main_program()): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + + data1 = fluid.data(name='X1', shape=[5, 5], dtype='float32') + data2 = fluid.data(name='X2', shape=[5, 4], dtype='float32') + + layer1 = np.random.random((5, 5)).astype('float32') + layer2 = np.random.random((5, 4)).astype('float32') + + bilinear = paddle.nn.Bilinear( + in1_features=5, in2_features=4, out_features=1000) + ret = bilinear(data1, data2) + + exe.run(fluid.default_startup_program()) + ret_fetch = exe.run(feed={'X1': layer1, + 'X2': layer2}, + fetch_list=[ret.name]) + self.assertEqual(ret_fetch[0].shape, (5, 1000)) + + +class TestBilinearAPIDygraph(unittest.TestCase): + def test_api(self): + paddle.disable_static() + layer1 = np.random.random((5, 5)).astype('float32') + layer2 = np.random.random((5, 4)).astype('float32') + bilinear = paddle.nn.Bilinear( + in1_features=5, in2_features=4, out_features=1000) + ret = bilinear(paddle.to_tensor(layer1), paddle.to_tensor(layer2)) + self.assertEqual(ret.shape, [5, 1000]) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py index 5cc8e2ba15d260b988ee66a5711aed42ca04c10b..cc2b1165ec304a63671b48d4702142ea38c9a2c1 100644 --- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py +++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py @@ -65,7 +65,7 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None): """Bipartite Matching algorithm for batch input. Arg: distance (numpy.array) : The distance of two entries with shape [M, N]. - lod (list of int): The offsets of each input in this batch. + lod (list of int): The length of each input in this batch. """ n = len(lod) m = distance.shape[1] @@ -73,6 +73,7 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None): match_dist = np.zeros((n, m), dtype=np.float32) cur_offset = 0 for i in range(n): + if lod[i] == 0: continue bipartite_match(distance[cur_offset:(cur_offset + lod[i]), :], match_indices[i, :], match_dist[i, :]) if match_type == 'per_prediction': @@ -155,5 +156,22 @@ class TestBipartiteMatchOpWithPerPredictionType(OpTest): self.check_output() +class TestBipartiteMatchOpWithEmptyLoD(OpTest): + def setUp(self): + self.op_type = 'bipartite_match' + lod = [[5, 6, 0, 12]] + dist = np.random.random((23, 217)).astype('float32') + match_indices, match_dist = batch_bipartite_match(dist, lod[0]) + + self.inputs = {'DistMat': (dist, lod)} + self.outputs = { + 'ColToRowMatchIndices': match_indices, + 'ColToRowMatchDist': match_dist, + } + + def test_check_output(self): + self.check_output() + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index f3e6c079eedc8effc948a44e08a5dcdcae8d3081..ab08a0aacbf08768ffff43974ee9a7c7dd4a7288 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -100,5 +100,45 @@ class TestDygraph(unittest.TestCase): out = paddle.cholesky(x, upper=False) +class TestCholeskySingularAPI(unittest.TestCase): + def setUp(self): + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place, with_out=False): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[4, 4], dtype="float64") + result = paddle.cholesky(input) + + input_np = np.zeros([4, 4]).astype("float64") + + exe = fluid.Executor(place) + try: + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + except fluid.core.EnforceNotMet as ex: + print("The mat is singular") + pass + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_np = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], + [16, 17, 18]]]).astype("float64") + input = fluid.dygraph.to_variable(input_np) + try: + result = paddle.cholesky(input) + except fluid.core.EnforceNotMet as ex: + print("The mat is singular") + pass + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_chunk_op.py b/python/paddle/fluid/tests/unittests/test_chunk_op.py new file mode 100644 index 0000000000000000000000000000000000000000..043b326fbd98769f96688ef2eeaf23c53978c94d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_chunk_op.py @@ -0,0 +1,138 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import numpy as np +from paddle.fluid import Program, program_guard +from paddle import fluid +import paddle + + +class TestChunkOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # The type of axis in chunk_op should be int or Variable. + def test_axis_type(): + x1 = paddle.data(shape=[4], dtype='float16', name='x3') + paddle.chunk(x=x1, chunks=2, axis=3.2) + + self.assertRaises(TypeError, test_axis_type) + + # The type of axis in chunk op should be int or Variable. + def test_axis_variable_type(): + x2 = paddle.data(shape=[4], dtype='float16', name='x9') + x3 = paddle.data(shape=[1], dtype='float16', name='x10') + paddle.chunk(input=x2, chunks=2, axis=x3) + + self.assertRaises(TypeError, test_axis_variable_type) + + # The type of num_or_sections in chunk_op should be int, tuple or list. + def test_chunks_type(): + x4 = paddle.data(shape=[4], dtype='float16', name='x4') + paddle.chunk(input=x4, chunks=2.1, axis=3) + + self.assertRaises(TypeError, test_chunks_type) + + def test_axis_type_tensor(): + x5 = paddle.data(shape=[4], dtype='float16', name='x6') + paddle.chunk(input=x5, chunks=2, axis=3.2) + + self.assertRaises(TypeError, test_axis_type_tensor) + + +class API_TestChunk(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = paddle.data('data1', shape=[4, 6, 6], dtype='float64') + data2 = paddle.data('data2', shape=[1], dtype='int32') + x0, x1, x2 = paddle.chunk(data1, chunks=3, axis=data2) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + input1 = np.random.random([4, 6, 6]).astype('float64') + input2 = np.array([2]).astype('int32') + r0, r1, r2, = exe.run(feed={"data1": input1, + "data2": input2}, + fetch_list=[x0, x1, x2]) + ex_x0, ex_x1, ex_x2 = np.array_split(input1, 3, axis=2) + self.assertTrue(np.allclose(ex_x0, r0)) + self.assertTrue(np.allclose(ex_x1, r1)) + self.assertTrue(np.allclose(ex_x2, r2)) + + +class API_TestChunk1(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = paddle.data('data1', shape=[4, 6, 6], dtype='float64') + x0, x1, x2 = paddle.chunk(data1, chunks=3, axis=2) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + input1 = np.random.random([4, 6, 6]).astype('float64') + r0, r1, r2, = exe.run(feed={"data1": input1}, + fetch_list=[x0, x1, x2]) + ex_x0, ex_x1, ex_x2 = np.array_split(input1, 3, axis=2) + self.assertTrue(np.allclose(ex_x0, r0)) + self.assertTrue(np.allclose(ex_x1, r1)) + self.assertTrue(np.allclose(ex_x2, r2)) + + +class API_TestDygraphChunk(unittest.TestCase): + def test_out1(self): + with fluid.dygraph.guard(): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = fluid.dygraph.to_variable(input_1) + x0, x1, x2 = paddle.chunk(input, chunks=3, axis=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.array_split(input_1, 3, axis=1) + self.assertTrue(np.allclose(ex_x0, x0_out)) + self.assertTrue(np.allclose(ex_x1, x1_out)) + self.assertTrue(np.allclose(ex_x2, x2_out)) + + def test_out2(self): + with fluid.dygraph.guard(): + input_1 = np.random.random([4, 6, 6]).astype("bool") + # input is a variable which shape is [4, 6, 6] + input = fluid.dygraph.to_variable(input_1) + x0, x1, x2 = paddle.chunk(input, chunks=3, axis=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.array_split(input_1, 3, axis=1) + self.assertTrue(np.allclose(ex_x0, x0_out)) + self.assertTrue(np.allclose(ex_x1, x1_out)) + self.assertTrue(np.allclose(ex_x2, x2_out)) + + def test_axis_tensor_input(self): + with fluid.dygraph.guard(): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = fluid.dygraph.to_variable(input_1) + num1 = paddle.full(shape=[1], fill_value=1, dtype='int32') + x0, x1, x2 = paddle.chunk(input, chunks=3, axis=num1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.array_split(input_1, 3, axis=1) + self.assertTrue(np.allclose(ex_x0, x0_out)) + self.assertTrue(np.allclose(ex_x1, x1_out)) + self.assertTrue(np.allclose(ex_x2, x2_out)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_clamp.py b/python/paddle/fluid/tests/unittests/test_clamp.py deleted file mode 100644 index d8d7fe01f8de8686724ea8ebc00491269f2cc0bd..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_clamp.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import paddle.tensor as tensor -import paddle.fluid as fluid -import numpy as np -import unittest - - -class TestClampAPI(unittest.TestCase): - def test_dygraph_clamp(self): - in1 = np.array([[1.2, 3.5], [4.5, 6.4]]).astype('float32') - with fluid.dygraph.guard(): - x1 = fluid.dygraph.to_variable(in1) - out1 = tensor.clamp(x1, min=3.5, max=5.0) - out2 = tensor.clamp(x1, min=2.5) - self.assertTrue( - np.allclose( - out1.numpy(), in1.clip( - min=3.5, max=5.0))) - self.assertTrue(np.allclose(out2.numpy(), in1.clip(min=2.5))) - - def test_clamp(self): - data_shape = [1, 9, 9, 4] - data = np.random.random(data_shape).astype('float32') - images = fluid.data(name='image', shape=data_shape, dtype='float32') - min = fluid.data(name='min', shape=[1], dtype='float32') - max = fluid.data(name='max', shape=[1], dtype='float32') - - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - exe = fluid.Executor(place) - - out_1 = tensor.clamp(images, min=min, max=max) - out_2 = tensor.clamp(images, min=0.2, max=0.9) - out_3 = tensor.clamp(images, min=0.3) - out_4 = tensor.clamp(images, max=0.7) - out_5 = tensor.clamp(images, min=min) - out_6 = tensor.clamp(images, max=max) - - res1, res2, res3, res4, res5, res6 = exe.run( - fluid.default_main_program(), - feed={ - "image": data, - "min": np.array([0.2]).astype('float32'), - "max": np.array([0.8]).astype('float32') - }, - fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6]) - - self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) - self.assertTrue(np.allclose(res2, data.clip(0.2, 0.9))) - self.assertTrue(np.allclose(res3, data.clip(min=0.3))) - self.assertTrue(np.allclose(res4, data.clip(max=0.7))) - self.assertTrue(np.allclose(res5, data.clip(min=0.2))) - self.assertTrue(np.allclose(res6, data.clip(max=0.8))) - - -class TestClampError(unittest.TestCase): - def test_errors(self): - x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16") - x2 = fluid.layers.data(name='x2', shape=[1], dtype="int8") - self.assertRaises(TypeError, tensor.clamp, x=x1, min=0.2, max=0.8) - self.assertRaises(TypeError, tensor.clamp, x=x2, min=0.2, max=0.8) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py index 33bbd4c8830d689bed513b9ce4084c3d00a923a8..93b31f052aae14546effef1696d2719dfff6727b 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_op.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard from op_test import OpTest @@ -109,5 +110,64 @@ class TestClipOpError(unittest.TestCase): self.assertRaises(TypeError, test_dtype) +class TestClipAPI(unittest.TestCase): + def test_clip(self): + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('float32') + images = fluid.data(name='image', shape=data_shape, dtype='float32') + min = fluid.data(name='min', shape=[1], dtype='float32') + max = fluid.data(name='max', shape=[1], dtype='float32') + + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + out_1 = paddle.clip(images, min=min, max=max) + out_2 = paddle.clip(images, min=0.2, max=0.9) + out_3 = paddle.clip(images, min=0.3) + out_4 = paddle.clip(images, max=0.7) + out_5 = paddle.clip(images, min=min) + out_6 = paddle.clip(images, max=max) + + res1, res2, res3, res4, res5, res6 = exe.run( + fluid.default_main_program(), + feed={ + "image": data, + "min": np.array([0.2]).astype('float32'), + "max": np.array([0.8]).astype('float32') + }, + fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6]) + + self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) + self.assertTrue(np.allclose(res2, data.clip(0.2, 0.9))) + self.assertTrue(np.allclose(res3, data.clip(min=0.3))) + self.assertTrue(np.allclose(res4, data.clip(max=0.7))) + self.assertTrue(np.allclose(res5, data.clip(min=0.2))) + self.assertTrue(np.allclose(res6, data.clip(max=0.8))) + + def test_clip_dygraph(self): + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + paddle.disable_static(place) + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('float32') + images = paddle.to_variable(data, dtype='float32') + + out_1 = paddle.clip(images, min=0.2, max=0.8) + out_2 = paddle.clip(images, min=0.2, max=0.9) + + self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) + self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) + + def test_errors(self): + paddle.enable_static() + x1 = fluid.data(name='x1', shape=[1], dtype="int16") + x2 = fluid.data(name='x2', shape=[1], dtype="int8") + x3 = fluid.data(name='x3', shape=[1], dtype="float32") + self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8) + self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8) + self.assertRaises(Exception, paddle.clip, x=x3) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_collective_base.py b/python/paddle/fluid/tests/unittests/test_collective_base.py index 3f3a5642abc242e994844d0aac1b79cbf664e4d4..512b2967e02fd01e67f416c2fd9222ae8589d8d8 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_base.py @@ -241,6 +241,15 @@ class TestDistBase(unittest.TestCase): need_result = input2 self.assertTrue(np.allclose(tr0_out, need_result)) self.assertTrue(np.allclose(tr1_out, need_result)) + elif col_type == "reduce": + need_result = input1 + input2 + self.assertTrue(np.allclose(tr1_out, need_result)) + elif col_type == "scatter": + need_result = input2 + need_result1 = need_result[0:need_result.shape[0] // 2] + need_result2 = need_result[need_result.shape[0] // 2:] + self.assertTrue(np.allclose(tr0_out, need_result1)) + self.assertTrue(np.allclose(tr1_out, need_result2)) elif col_type == "allreduce": need_result = input1 + input2 self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce.py b/python/paddle/fluid/tests/unittests/test_collective_reduce.py new file mode 100644 index 0000000000000000000000000000000000000000..36837d6a227febd02e6ef1e2aeb905de19ca8acc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce.py @@ -0,0 +1,34 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np + +from test_collective_base import TestDistBase + + +class TestCReduceOp(TestDistBase): + def _setup_config(self): + pass + + def test_reduce(self): + self.check_with_place("collective_reduce_op.py", "reduce") + + def test_reduce_calc_stream(self): + self.check_with_place("collective_reduce_op_calc_stream.py", "reduce") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter.py b/python/paddle/fluid/tests/unittests/test_collective_scatter.py new file mode 100644 index 0000000000000000000000000000000000000000..7fe3ce73359559c0f9b4e0e3990032ce693aab8a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter.py @@ -0,0 +1,31 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np + +from test_collective_base import TestDistBase + + +class TestCScatterOp(TestDistBase): + def _setup_config(self): + pass + + def test_scatter(self): + self.check_with_place("collective_scatter_op.py", "scatter") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index b277047500fb8082bd7a93998f424902bd563be6..30207340a27db0c1d00ab982cbac716e4b639c7e 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -25,7 +25,7 @@ import numpy import paddle import paddle.fluid as fluid -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet diff --git a/python/paddle/fluid/tests/unittests/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/test_communicator_sync.py index e6db5c4d8c1357c4b6bde87101a29502c14c37d3..c0044d9d620796057cce0e3a51b2dec2878a0e17 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_sync.py @@ -21,7 +21,7 @@ import os import paddle import paddle.fluid as fluid -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py index 99d0c77fce50ffdfae8a3ec11fce42ea7942c5e6..cfad50409802d4f3d35c9da3b22597c681da91b1 100644 --- a/python/paddle/fluid/tests/unittests/test_compare_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_op.py @@ -93,8 +93,9 @@ def create_paddle_case(op_type, callback): def test_broadcast_api_1(self): with program_guard(Program(), Program()): - x = paddle.nn.data(name='x', shape=[1, 2, 1, 3], dtype='int32') - y = paddle.nn.data(name='y', shape=[1, 2, 3], dtype='int32') + x = paddle.static.data( + name='x', shape=[1, 2, 1, 3], dtype='int32') + y = paddle.static.data(name='y', shape=[1, 2, 3], dtype='int32') op = eval("paddle.%s" % (self.op_type)) out = op(x, y) exe = paddle.static.Executor(self.place) diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..da527b26bf0608da5a648d92b492ff27cf2802f0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py @@ -0,0 +1,208 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class Conv1dTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=4, + spartial_shape=(16, ), + num_channels=6, + num_filters=8, + filter_size=3, + padding=0, + padding_mode="zeros", + stride=1, + dilation=1, + groups=1, + no_bias=False, + dtype="float32", + data_format="NCL"): + super(Conv1dTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.num_channels = num_channels + self.num_filters = num_filters + self.spartial_shape = spartial_shape + self.filter_size = filter_size + self.data_format = data_format + self.channel_last = (self.data_format == "NHWC") + + self.padding = padding + self.padding_mode = padding_mode + self.stride = stride + self.dilation = dilation + self.groups = groups + self.no_bias = no_bias + self.dtype = dtype + + def setUp(self): + input_shape = (self.batch_size, self.num_channels + ) + self.spartial_shape if not self.channel_last else ( + self.batch_size, ) + self.spartial_shape + ( + self.num_channels, ) + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.num_filters, self.num_channels + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform( + -1, 1, size=(self.num_filters, )).astype(self.dtype) + else: + self.bias = None + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, self.num_channels, + -1) if not self.channel_last else ( + -1, -1, self.num_channels) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.num_filters, ), dtype=self.dtype) + y_var = F.conv1d( + x_var, + w_var, + b_var if not self.no_bias else None, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = paddle.to_tensor(self.input) + conv = nn.Conv1d( + self.num_channels, + self.num_filters, + self.filter_size, + padding=self.padding, + padding_mode=self.padding_mode, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + result1 = self.functional(place) + with dg.guard(place): + result2 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class Conv1dErrorTestCase(Conv1dTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(Conv1dTestCase(methodName='runTest')) + suite.addTest(Conv1dTestCase(methodName='runTest', stride=[1], dilation=2)) + suite.addTest(Conv1dTestCase(methodName='runTest', stride=2, dilation=(1))) + suite.addTest( + Conv1dTestCase( + methodName='runTest', padding="same", no_bias=True)) + suite.addTest( + Conv1dTestCase( + methodName='runTest', filter_size=3, padding='valid')) + suite.addTest( + Conv1dTestCase( + methodName='runTest', padding=2, data_format='NLC')) + suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1])) + suite.addTest(Conv1dTestCase(methodName='runTest', padding=2)) + suite.addTest(Conv1dTestCase(methodName='runTest')) + suite.addTest( + Conv1dTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + Conv1dTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + groups=3, + padding="valid", + data_format='NLC')) + + +def add_error_cases(suite): + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', padding_mode="reflect", padding="valid")) + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', data_format="VALID")) + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', padding_mode="VALID")) + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', num_channels=5, groups=2)) + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', num_filters=8, num_channels=15, groups=3)) + suite.addTest( + Conv1dErrorTestCase( + methodName='runTest', padding=[1, 2, 3, 4, 5])) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..73227dd3610376d85fcfc70bb2653dfd927427fd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py @@ -0,0 +1,229 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import paddle.fluid.initializer as I +import unittest + + +class ConvTranspose1dTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + batch_size=4, + spartial_shape=16, + in_channels=6, + out_channels=8, + filter_size=3, + output_size=None, + padding=0, + output_padding=0, + stride=1, + dilation=1, + groups=1, + no_bias=False, + data_format="NCL", + dtype="float32"): + super(ConvTranspose1dTestCase, self).__init__(methodName) + self.batch_size = batch_size + self.in_channels = in_channels + self.out_channels = out_channels + self.spartial_shape = spartial_shape + self.filter_size = filter_size + self.output_size = output_size + + self.padding = padding + self.output_padding = output_padding + self.stride = stride + self.dilation = dilation + self.groups = groups + self.no_bias = no_bias + self.data_format = data_format + self.dtype = dtype + + def setUp(self): + + self.channel_last = False if self.data_format == "NCL" else True + input_shape = (self.batch_size, self.in_channels, + self.spartial_shape) if not self.channel_last else ( + self.batch_size, + self.spartial_shape, + self.in_channels, ) + self.input = np.random.randn(*input_shape).astype(self.dtype) + + if isinstance(self.filter_size, int): + filter_size = [self.filter_size] + else: + filter_size = self.filter_size + self.weight_shape = weight_shape = (self.in_channels, self.out_channels + // self.groups) + tuple(filter_size) + self.weight = np.random.uniform( + -1, 1, size=weight_shape).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform( + -1, 1, size=(self.out_channels, )).astype(self.dtype) + else: + self.bias = None + + def functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + input_shape = (-1, self.in_channels, + -1) if not self.channel_last else ( + -1, -1, self.in_channels) + x_var = fluid.data("input", input_shape, dtype=self.dtype) + w_var = fluid.data( + "weight", self.weight_shape, dtype=self.dtype) + b_var = fluid.data( + "bias", (self.out_channels, ), dtype=self.dtype) + y_var = F.conv_transpose1d( + x_var, + w_var, + None if self.no_bias else b_var, + output_size=self.output_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) + feed_dict = {"input": self.input, "weight": self.weight} + if self.bias is not None: + feed_dict["bias"] = self.bias + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def paddle_nn_layer(self): + x_var = paddle.to_tensor(self.input) + conv = nn.ConvTranspose1d( + self.in_channels, + self.out_channels, + self.filter_size, + padding=self.padding, + output_padding=self.output_padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format) + conv.weight.set_value(self.weight) + if not self.no_bias: + conv.bias.set_value(self.bias) + y_var = conv(x_var, output_size=self.output_size) + y_np = y_var.numpy() + return y_np + + def _test_equivalence(self, place): + result1 = self.functional(place) + with dg.guard(place): + result2 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class ConvTranspose1dErrorTestCase(ConvTranspose1dTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(ValueError): + self.paddle_nn_layer() + + +def add_cases(suite): + suite.addTest(ConvTranspose1dTestCase(methodName='runTest')) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', stride=[2], no_bias=True, dilation=2)) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', + filter_size=(3), + output_size=[36], + stride=[2], + dilation=2)) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', stride=2, dilation=(2))) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', padding="valid")) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', padding='valid')) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', filter_size=1, padding=3)) + suite.addTest(ConvTranspose1dTestCase(methodName='runTest', padding=[2])) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', data_format="NLC")) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', groups=2, padding="valid")) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', + out_channels=6, + in_channels=3, + groups=3, + padding="valid")) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', + data_format="NLC", + spartial_shape=16, + output_size=18)) + suite.addTest( + ConvTranspose1dTestCase( + methodName='runTest', data_format="NLC", stride=3, + output_padding=2)) + + +def add_error_cases(suite): + suite.addTest( + ConvTranspose1dErrorTestCase( + methodName='runTest', data_format="not_valid")) + suite.addTest( + ConvTranspose1dErrorTestCase( + methodName='runTest', in_channels=5, groups=2)) + suite.addTest( + ConvTranspose1dErrorTestCase( + methodName='runTest', stride=2, output_padding=3)) + suite.addTest( + ConvTranspose1dErrorTestCase( + methodName='runTest', output_size="not_valid")) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py index 64653ce2e7b8630030094b4004ecb17d56d3ff43..6bfe2aca530ddea6b49f12ad34dd9672e2a99ab5 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py @@ -20,6 +20,10 @@ import paddle.fluid.initializer as I import unittest +def _reverse_repeat_list(t, n): + return list(x for x in reversed(t) for _ in range(n)) + + class Conv2DTestCase(unittest.TestCase): def __init__(self, methodName='runTest', @@ -29,12 +33,11 @@ class Conv2DTestCase(unittest.TestCase): num_filters=8, filter_size=3, padding=0, + padding_mode='zeros', stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCHW", dtype="float32"): super(Conv2DTestCase, self).__init__(methodName) @@ -45,12 +48,16 @@ class Conv2DTestCase(unittest.TestCase): self.filter_size = filter_size self.padding = padding + if padding_mode in {'reflect', 'replicate', 'circular'}: + _paired_padding = fluid.layers.utils.convert_to_list(padding, 2, + 'padding') + self._reversed_padding_repeated_twice = _reverse_repeat_list( + _paired_padding, 2) + self.padding_mode = padding_mode self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -91,19 +98,27 @@ class Conv2DTestCase(unittest.TestCase): bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) + if self.padding_mode != 'zeros': + x_var = F.pad(x_var, + self._reversed_padding_repeated_twice, + mode=self.padding_mode, + data_format=self.data_format) + padding = 0 + else: + padding = self.padding + y_var = fluid.layers.conv2d( x_var, self.num_filters, self.filter_size, - padding=self.padding, + padding=padding, stride=self.stride, dilation=self.dilation, groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) + feed_dict = {"input": self.input} exe = fluid.Executor(place) exe.run(start) @@ -122,16 +137,24 @@ class Conv2DTestCase(unittest.TestCase): "weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data( "bias", (self.num_filters, ), dtype=self.dtype) + + if self.padding_mode != 'zeros': + x_var = F.pad(x_var, + self._reversed_padding_repeated_twice, + mode=self.padding_mode, + data_format=self.data_format) + padding = 0 + else: + padding = self.padding + y_var = F.conv2d( x_var, w_var, b_var if not self.no_bias else None, - padding=self.padding, + padding=padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -143,18 +166,16 @@ class Conv2DTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv2D( + conv = nn.Conv2d( self.num_channels, self.num_filters, self.filter_size, padding=self.padding, + padding_mode=self.padding_mode, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -198,7 +219,7 @@ def add_cases(suite): methodName='runTest', stride=2, dilation=(2, 1))) suite.addTest( Conv2DTestCase( - methodName='runTest', padding="same", no_bias=True, act="sigmoid")) + methodName='runTest', padding="same", no_bias=True)) suite.addTest( Conv2DTestCase( methodName='runTest', filter_size=(3, 3), padding='valid')) @@ -222,15 +243,28 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) + suite.addTest( + Conv2DTestCase( + methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='reflect')) + suite.addTest( + Conv2DTestCase( + methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='replicate')) + suite.addTest( + Conv2DTestCase( + methodName='runTest', + filter_size=(3, 3), + padding=1, + padding_mode='circular')) def add_error_cases(suite): - suite.addTest( - Conv2DErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv2DErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py index 989836d5993af5620a7b5fbd86c07b028e419fc4..ba450b345b8a309f5d7ff1e7a5c149809f55f46c 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -29,13 +29,12 @@ class Conv2DTransposeTestCase(unittest.TestCase): num_filters=8, filter_size=3, output_size=None, + output_padding=0, padding=0, stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCHW", dtype="float32"): super(Conv2DTransposeTestCase, self).__init__(methodName) @@ -45,14 +44,13 @@ class Conv2DTransposeTestCase(unittest.TestCase): self.spartial_shape = spartial_shape self.filter_size = filter_size self.output_size = output_size + self.output_padding = output_padding self.padding = padding self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -93,6 +91,7 @@ class Conv2DTransposeTestCase(unittest.TestCase): bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) + y_var = fluid.layers.conv2d_transpose( x_var, self.num_filters, @@ -104,8 +103,6 @@ class Conv2DTransposeTestCase(unittest.TestCase): groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -125,17 +122,22 @@ class Conv2DTransposeTestCase(unittest.TestCase): "weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data( "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv2d_transpose( + + if self.output_padding != 0: + output_size = None + else: + output_size = self.output_size + + y_var = F.conv_transpose2d( x_var, w_var, None if self.no_bias else b_var, - output_size=self.output_size, + output_size=output_size, padding=self.padding, + output_padding=self.output_padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -147,32 +149,38 @@ class Conv2DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv2DTranspose( + + if self.output_padding != 0: + output_size = None + else: + output_size = self.output_size + + conv = nn.ConvTranspose2d( self.num_channels, self.num_filters, self.filter_size, - output_size=self.output_size, padding=self.padding, + output_padding=self.output_padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) - y_var = conv(x_var) + y_var = conv(x_var, output_size) y_np = y_var.numpy() return y_np def _test_equivalence(self, place): place = fluid.CPUPlace() + result1 = self.fluid_layer(place) result2 = self.functional(place) + with dg.guard(place): result3 = self.paddle_nn_layer() + np.testing.assert_array_almost_equal(result1, result2) np.testing.assert_array_almost_equal(result2, result3) @@ -194,7 +202,7 @@ class Conv2DTransposeErrorTestCase(Conv2DTransposeTestCase): def add_cases(suite): - suite.addTest(Conv2DTransposeTestCase(methodName='runTest', act="relu")) + suite.addTest(Conv2DTransposeTestCase(methodName='runTest')) suite.addTest( Conv2DTransposeTestCase( methodName='runTest', stride=[1, 2], no_bias=True, dilation=2)) @@ -211,9 +219,6 @@ def add_cases(suite): suite.addTest( Conv2DTransposeTestCase( methodName='runTest', padding="valid")) - suite.addTest( - Conv2DTransposeTestCase( - methodName='runTest', padding='valid')) suite.addTest( Conv2DTransposeTestCase( methodName='runTest', filter_size=1, padding=(2, 3))) @@ -240,15 +245,22 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) + suite.addTest( + Conv2DTransposeTestCase( + methodName='runTest', + num_filters=6, + num_channels=3, + spartial_shape=(7, 7), + filter_size=[5, 5], + groups=1, + padding=2, + stride=2, + output_size=[14, 14], + output_padding=[1, 1], )) def add_error_cases(suite): - suite.addTest( - Conv2DTransposeErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv2DTransposeErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index f4418150e8a69d795ff544073b6ba6dd7431e44b..913db51da500b6c324abfab61744dfc1947bf7a5 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -77,8 +77,13 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): output_size = attrs['output_size'] out_h = output_size[0] + pad_h_0 + pad_h_1 out_w = output_size[1] + pad_w_0 + pad_w_1 - - out = np.zeros((in_n, out_c, out_h, out_w), dtype=input_.dtype) + out_pad_h = 0 + out_pad_w = 0 + if 'output_padding' in attrs: + out_pad_h = attrs['output_padding'][0] + out_pad_w = attrs['output_padding'][1] + out = np.zeros( + (in_n, out_c, out_h + out_pad_h, out_w + out_pad_w), dtype=input_.dtype) for n in range(in_n): for i in range(in_h): @@ -99,7 +104,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): out[n, g * f_out_c + k, i1:i2:dilations[0], j1:j2: dilations[1]] += tmp_out - out = out[:, :, pad_h_0:out_h - pad_h_1, pad_w_0:out_w - pad_w_1] + out = out[:, :, pad_h_0:out_h - pad_h_1 + out_pad_h, pad_w_0:out_w - pad_w_1 + + out_pad_w] if attrs['data_format'] == 'NHWC': out = np.transpose(out, [0, 2, 3, 1]) return out @@ -114,6 +120,7 @@ class TestConv2dTransposeOp(OpTest): self.use_cudnn = False self.use_mkldnn = False self.output_size = None + self.output_padding = [] self.data_format = "NCHW" self.pad = [0, 0] self.padding_algorithm = "EXPLICIT" @@ -138,6 +145,9 @@ class TestConv2dTransposeOp(OpTest): if self.output_size is not None: self.attrs['output_size'] = self.output_size + if len(self.output_padding) > 0: + self.attrs['output_padding'] = self.output_padding + output = conv2dtranspose_forward_naive(input_, filter_, self.attrs).astype(self.dtype) @@ -290,6 +300,18 @@ class TestWithEvenUpsample(TestConv2dTransposeOp): self.filter_size = [f_c, 6, 5, 5] +class TestWithEvenUpsampleOutputPadding(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_padding = [1, 1] + self.input_size = [2, 3, 7, 7] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 5, 5] + + class Test_NHWC(TestConv2dTransposeOp): def init_test_case(self): self.pad = [0, 0] @@ -375,6 +397,19 @@ class TestWithEvenUpsample_NHWC(TestConv2dTransposeOp): self.data_format = 'NHWC' +class TestWithEvenUpsample_NHWC_output_padding(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_padding = [1, 1] + self.input_size = [2, 7, 7, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 5, 5] + self.data_format = 'NHWC' + + # ------------ test_cudnn ------------ @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py index cf582c6210b76c6546de6d09d9219dbf4005bb17..56355a1c95e0396d0dec53cae02c3a99bf874013 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -32,9 +32,7 @@ class Conv3DTestCase(unittest.TestCase): stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCDHW", dtype="float32"): super(Conv3DTestCase, self).__init__(methodName) @@ -48,9 +46,7 @@ class Conv3DTestCase(unittest.TestCase): self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -101,8 +97,6 @@ class Conv3DTestCase(unittest.TestCase): groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -130,8 +124,6 @@ class Conv3DTestCase(unittest.TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -143,7 +135,7 @@ class Conv3DTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv3D( + conv = nn.Conv3d( self.num_channels, self.num_filters, self.filter_size, @@ -151,10 +143,7 @@ class Conv3DTestCase(unittest.TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) @@ -225,15 +214,10 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) def add_error_cases(suite): - suite.addTest( - Conv3DErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv3DErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py index acaf33467dbfc1c580ab3a36f08d0c2a26d7c239..e30f0cd3ecd0b872efa53c85e0666e4a6fb00a88 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -33,9 +33,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): stride=1, dilation=1, groups=1, - act=None, no_bias=False, - use_cudnn=True, data_format="NCDHW", dtype="float32"): super(Conv3DTransposeTestCase, self).__init__(methodName) @@ -50,9 +48,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): self.stride = stride self.dilation = dilation self.groups = groups - self.act = act self.no_bias = no_bias - self.use_cudnn = use_cudnn self.data_format = data_format self.dtype = dtype @@ -104,8 +100,6 @@ class Conv3DTransposeTestCase(unittest.TestCase): groups=self.groups, param_attr=weight_attr, bias_attr=bias_attr, - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) feed_dict = {"input": self.input} exe = fluid.Executor(place) @@ -125,7 +119,7 @@ class Conv3DTransposeTestCase(unittest.TestCase): "weight", self.weight_shape, dtype=self.dtype) b_var = fluid.data( "bias", (self.num_filters, ), dtype=self.dtype) - y_var = F.conv3d_transpose( + y_var = F.conv_transpose3d( x_var, w_var, None if self.no_bias else b_var, @@ -134,8 +128,6 @@ class Conv3DTransposeTestCase(unittest.TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, data_format=self.data_format) feed_dict = {"input": self.input, "weight": self.weight} if self.bias is not None: @@ -147,23 +139,19 @@ class Conv3DTransposeTestCase(unittest.TestCase): def paddle_nn_layer(self): x_var = dg.to_variable(self.input) - conv = nn.Conv3DTranspose( + conv = nn.ConvTranspose3d( self.num_channels, self.num_filters, self.filter_size, - output_size=self.output_size, padding=self.padding, stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - use_cudnn=self.use_cudnn, - data_format=self.data_format, - dtype=self.dtype) + data_format=self.data_format) conv.weight.set_value(self.weight) if not self.no_bias: conv.bias.set_value(self.bias) - y_var = conv(x_var) + y_var = conv(x_var, self.output_size) y_np = y_var.numpy() return y_np @@ -194,7 +182,7 @@ class Conv3DTransposeErrorTestCase(Conv3DTransposeTestCase): def add_cases(suite): - suite.addTest(Conv3DTransposeTestCase(methodName='runTest', act="tanh")) + suite.addTest(Conv3DTransposeTestCase(methodName='runTest')) suite.addTest( Conv3DTransposeTestCase( methodName='runTest', stride=[1, 2, 1], dilation=2, no_bias=True)) @@ -240,15 +228,10 @@ def add_cases(suite): num_filters=6, num_channels=3, groups=3, - use_cudnn=False, - act="sigmoid", padding="valid")) def add_error_cases(suite): - suite.addTest( - Conv3DTransposeErrorTestCase( - methodName='runTest', use_cudnn="not_valid")) suite.addTest( Conv3DTransposeErrorTestCase( methodName='runTest', num_channels=5, groups=2)) diff --git a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py new file mode 100644 index 0000000000000000000000000000000000000000..1e25613fa63da440f71f23841095f153e61735e9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py @@ -0,0 +1,140 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import paddle.fluid.core as core + +from paddle.fluid import Program, program_guard, Executor, default_main_program + + +class TestCosineSimilarityAPI(unittest.TestCase): + def setUp(self): + self.places = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def _get_numpy_out(self, x1, x2, axis=1, eps=1e-8): + w12 = np.sum(x1 * x2, axis=axis) + w1 = np.sum(x1 * x1, axis=axis) + w2 = np.sum(x2 * x2, axis=axis) + n12 = np.sqrt(np.clip(w1 * w2, eps * eps, None)) + cos_sim = w12 / n12 + return cos_sim + + def check_static_result(self, place): + paddle.enable_static() + + with program_guard(Program(), Program()): + shape = [10, 15] + axis = 1 + eps = 1e-8 + np.random.seed(0) + np_x1 = np.random.rand(*shape).astype(np.float32) + np_x2 = np.random.rand(*shape).astype(np.float32) + + x1 = paddle.data(name="x1", shape=shape) + x2 = paddle.data(name="x2", shape=shape) + result = F.cosine_similarity(x1, x2, axis=axis, eps=eps) + exe = Executor(place) + fetches = exe.run(default_main_program(), + feed={"x1": np_x1, + "x2": np_x2}, + fetch_list=[result]) + + np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) + self.assertTrue(np.allclose(fetches[0], np_out)) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph_1(self): + paddle.disable_static() + + shape = [10, 15] + axis = 1 + eps = 1e-8 + np.random.seed(1) + np_x1 = np.random.rand(*shape).astype(np.float32) + np_x2 = np.random.rand(*shape).astype(np.float32) + np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) + + tesnor_x1 = paddle.to_variable(np_x1) + tesnor_x2 = paddle.to_variable(np_x2) + y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) + + self.assertTrue(np.allclose(y.numpy(), np_out)) + + def test_dygraph_2(self): + paddle.disable_static() + + shape = [12, 13] + axis = 0 + eps = 1e-6 + np.random.seed(1) + np_x1 = np.random.rand(*shape).astype(np.float32) + np_x2 = np.random.rand(*shape).astype(np.float32) + np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) + + tesnor_x1 = paddle.to_variable(np_x1) + tesnor_x2 = paddle.to_variable(np_x2) + y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) + + self.assertTrue(np.allclose(y.numpy(), np_out)) + + def test_dygraph_3(self): + paddle.disable_static() + + shape1 = [10, 12, 10] + shape2 = [10, 1, 10] + axis = 2 + eps = 1e-6 + np.random.seed(1) + np_x1 = np.random.rand(*shape1).astype(np.float32) + np_x2 = np.random.rand(*shape2).astype(np.float32) + np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) + + tesnor_x1 = paddle.to_variable(np_x1) + tesnor_x2 = paddle.to_variable(np_x2) + y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) + + self.assertTrue(np.allclose(y.numpy(), np_out)) + + def test_dygraph_4(self): + paddle.disable_static() + + shape1 = [23, 12, 1] + shape2 = [23, 1, 10] + axis = 2 + eps = 1e-6 + np.random.seed(1) + np_x1 = np.random.rand(*shape1).astype(np.float32) + np_x2 = np.random.rand(*shape2).astype(np.float32) + np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) + + cos_sim_func = nn.CosineSimilarity(axis=axis, eps=eps) + tesnor_x1 = paddle.to_variable(np_x1) + tesnor_x2 = paddle.to_variable(np_x2) + y = cos_sim_func(tesnor_x1, tesnor_x2) + + self.assertTrue(np.allclose(y.numpy(), np_out)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index 7f667d6b71c7f52f6d5afb42045c2da0cc45587b..4982cd195820811b9a8ec3fe6d01955234032120 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -535,5 +535,443 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) +class FuncCrossEntropyLoss(unittest.TestCase): + #1 + def test_cross_entropy_loss_1d_with_weight_mean(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np)[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #2 + def test_cross_entropy_loss_1d_with_weight_sum(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight, reduction='sum') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np), + reduction='sum') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #3 + def test_cross_entropy_loss_1d_with_weight_none(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight, reduction='none') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np), + reduction='none') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #4 + def test_cross_entropy_loss_1d_mean(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + ret = paddle.nn.functional.cross_entropy(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np)[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #5 + def test_cross_entropy_loss_1d_sum(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + ret = paddle.nn.functional.cross_entropy( + input, label, reduction='sum') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + reduction='sum') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #6 + def test_cross_entropy_loss_1d_none(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + ret = paddle.nn.functional.cross_entropy( + input, label, reduction='none') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + reduction='none') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #7 + def test_cross_entropy_loss_2d_with_weight_none(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight, reduction='none') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np), + reduction='none') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #8 + def test_cross_entropy_loss_2d_with_weight_mean(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight, reduction='mean') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np), + reduction='mean') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='mean')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #9 + def test_cross_entropy_loss_2d_with_weight_sum(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + ret = paddle.nn.functional.cross_entropy( + input, label, weight=weight, reduction='sum') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + weight=fluid.dygraph.to_variable(weight_np), + reduction='sum') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #10 + def test_cross_entropy_loss_2d_none(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + ret = paddle.nn.functional.cross_entropy( + input, label, reduction='none') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + reduction='none') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d(input_np, label_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #11 + def test_cross_entropy_loss_2d_mean(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + ret = paddle.nn.functional.cross_entropy( + input, label, reduction='mean') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + reduction='mean') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, reduction='mean')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + #12 + def test_cross_entropy_loss_2d_sum(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + ret = paddle.nn.functional.cross_entropy( + input, label, reduction='sum') + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + dy_ret = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np), + reduction='sum') + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d(input_np, label_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py index 57024e8ae5cd5dd2fbb231269fda50b5ef6e7a47..ad121fac8cc045e67cf116d2cf9cedd6ac9bef99 100644 --- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py +++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py @@ -54,7 +54,7 @@ class TestCumsumOp(unittest.TestCase): def run_static(self, use_gpu=False): with fluid.program_guard(fluid.Program()): data_np = np.random.random((100, 100)).astype(np.float32) - x = paddle.nn.data('X', [100, 100]) + x = paddle.static.data('X', [100, 100]) y = paddle.cumsum(x) y2 = paddle.cumsum(x, axis=0) y3 = paddle.cumsum(x, axis=-1) @@ -100,7 +100,7 @@ class TestCumsumOp(unittest.TestCase): def test_name(self): with fluid.program_guard(fluid.Program()): - x = paddle.nn.data('x', [3, 4]) + x = paddle.static.data('x', [3, 4]) y = paddle.cumsum(x, name='out') self.assertTrue('out' in y.name) diff --git a/python/paddle/fluid/tests/unittests/test_data.py b/python/paddle/fluid/tests/unittests/test_data.py index 22dc72048e429ed257e9d7d1213b6cb7dcafbf1a..8070148f8b36dd7dab7711abaf25994acebc7e6f 100644 --- a/python/paddle/fluid/tests/unittests/test_data.py +++ b/python/paddle/fluid/tests/unittests/test_data.py @@ -16,9 +16,11 @@ from __future__ import print_function import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid import Program, program_guard +import paddle.fluid.core as core class TestApiDataError(unittest.TestCase): @@ -53,5 +55,49 @@ class TestApiDataError(unittest.TestCase): self.assertRaises(TypeError, test_shape_type) +class TestApiStaticDataError(unittest.TestCase): + def test_fluid_dtype(self): + with program_guard(Program(), Program()): + x1 = paddle.static.data(name="x1", shape=[2, 25]) + self.assertEqual(x1.dtype, core.VarDesc.VarType.FP32) + + x2 = paddle.static.data(name="x2", shape=[2, 25], dtype="bool") + self.assertEqual(x2.dtype, core.VarDesc.VarType.BOOL) + + paddle.set_default_dtype("float64") + x3 = paddle.static.data(name="x3", shape=[2, 25]) + self.assertEqual(x3.dtype, core.VarDesc.VarType.FP64) + + def test_fluid_data(self): + with program_guard(Program(), Program()): + + # 1. The type of 'name' in fluid.data must be str. + def test_name_type(): + paddle.static.data(name=1, shape=[2, 25], dtype="bool") + + self.assertRaises(TypeError, test_name_type) + + # 2. The type of 'shape' in fluid.data must be list or tuple. + def test_shape_type(): + paddle.static.data(name='data1', shape=2, dtype="bool") + + self.assertRaises(TypeError, test_shape_type) + + def test_layers_data(self): + with program_guard(Program(), Program()): + + # 1. The type of 'name' in layers.data must be str. + def test_name_type(): + paddle.static.data(name=1, shape=[2, 25], dtype="bool") + + self.assertRaises(TypeError, test_name_type) + + # 2. The type of 'shape' in layers.data must be list or tuple. + def test_shape_type(): + paddle.static.data(name='data1', shape=2, dtype="bool") + + self.assertRaises(TypeError, test_shape_type) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_data_norm_op.py b/python/paddle/fluid/tests/unittests/test_data_norm_op.py index c766cf17f422205521641ae44ab2060b4ab6e81c..cefef9ff9183e34d1ae7ae3e9a2f88969bf094a6 100644 --- a/python/paddle/fluid/tests/unittests/test_data_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_data_norm_op.py @@ -271,7 +271,7 @@ class TestDataNormOpWithEnableScaleAndShift(OpTest): self.use_mkldnn = False epsilon = 0.00001 slot_dim = -1 - enable_scale_and_shitf = True + enable_scale_and_shift = True x_shape = [2, 50] scale_shape = [50] tp = np.float32 @@ -319,6 +319,63 @@ class TestDataNormOpWithEnableScaleAndShift(OpTest): self.check_grad(['X'], 'Y', no_grad_set=set([])) +class TestDataNormOpWithoutEnableScaleAndShift(OpTest): + """ + test class for data norm op + test forward and backward + """ + + def setUp(self): + """ + init data norm op test env + """ + self.op_type = 'data_norm' + self.use_mkldnn = False + epsilon = 0.00001 + slot_dim = -1 + enable_scale_and_shift = True + x_shape = [2, 50] + scale_shape = [50] + tp = np.float32 + + x_val = np.random.uniform(-1, 1, x_shape).astype(tp) + batch_size = np.ones(scale_shape).astype(tp) + batch_size *= 1e4 + batch_sum = np.zeros(scale_shape).astype(tp) + batch_square_sum = np.ones(scale_shape).astype(tp) + batch_square_sum *= 1e4 + scale_w = np.ones(scale_shape).astype(tp) + bias = np.zeros(scale_shape).astype(tp) + + y = np.array(x_val) + + mean = np.zeros(x_shape).astype(tp) + scale = np.ones(x_shape).astype(tp) + + self.inputs = { + "X": x_val, + "BatchSize": batch_size, + "BatchSum": batch_sum, + "BatchSquareSum": batch_square_sum, + "scale_w": scale_w, + "bias": bias + } + self.outputs = {"Y": y, "Means": mean, "Scales": scale} + self.attrs = {"epsilon": epsilon, "use_mkldnn": self.use_mkldnn} + + def test_check_output(self): + """ + test check forward, check output + """ + self.check_output() + + def test_check_grad(self): + """ + test check backward, check grad + """ + self.check_grad(['X'], 'Y', no_grad_set=set([])) + + class TestDataNormOpWithEnableScaleAndShift_1(OpTest): """ test class for data norm op @@ -333,7 +390,7 @@ class TestDataNormOpWithEnableScaleAndShift_1(OpTest): self.use_mkldnn = False epsilon = 0.00001 slot_dim = 1 - enable_scale_and_shitf = True + enable_scale_and_shift = True x_shape = [2, 50] scale_shape = [50] tp = np.float32 diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py index f8cb6170be945ed628440b5a068f1acd0ac26503..a16f21c0f97c0902dd6c26561ed3f707b28ff947 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py @@ -122,14 +122,8 @@ class TestBase(unittest.TestCase): label = item['label'] assert image.shape() == [BATCH_SIZE, 784] assert label.shape() == [BATCH_SIZE, 1] - if ps[i]._equals(fluid.CPUPlace()): - assert image._place()._equals(fluid.CPUPlace()) - assert label._place()._equals(fluid.CPUPlace()) - else: - assert image._place()._equals( - fluid.CUDAPinnedPlace()) - assert label._place()._equals( - fluid.CUDAPinnedPlace()) + assert image._place()._equals(ps[i]) + assert label._place()._equals(ps[i]) L, = exe.run(program=prog, feed=d, fetch_list=[loss], diff --git a/python/paddle/fluid/tests/unittests/test_default_dtype.py b/python/paddle/fluid/tests/unittests/test_default_dtype.py new file mode 100644 index 0000000000000000000000000000000000000000..057933fc7a735c2732cd651e83e99ddfa747b8a8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_default_dtype.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +from paddle.framework import set_default_dtype, get_default_dtype +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph import Linear +import paddle.fluid.core as core +from paddle import to_variable + + +class TestDefaultType(unittest.TestCase): + def check_default(self): + self.assertEqual("float32", get_default_dtype()) + + def test_api(self): + self.check_default() + + set_default_dtype("float64") + self.assertEqual("float64", get_default_dtype()) + + set_default_dtype("float32") + self.assertEqual("float32", get_default_dtype()) + + set_default_dtype("float16") + self.assertEqual("float16", get_default_dtype()) + + set_default_dtype(np.float64) + self.assertEqual("float64", get_default_dtype()) + + set_default_dtype(np.float32) + self.assertEqual("float32", get_default_dtype()) + + set_default_dtype(np.float16) + self.assertEqual("float16", get_default_dtype()) + + +class TestRaiseError(unittest.TestCase): + def test_error(self): + self.assertRaises(TypeError, set_default_dtype, "int32") + self.assertRaises(TypeError, set_default_dtype, np.int32) + self.assertRaises(TypeError, set_default_dtype, "int64") + self.assertRaises(TypeError, set_default_dtype, np.int64) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_device.py b/python/paddle/fluid/tests/unittests/test_device.py new file mode 100644 index 0000000000000000000000000000000000000000..0ab56f9244f93266b90f3316bc2c2be5623e0ee7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_device.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from op_test import OpTest + +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.framework as framework +import warnings +import paddle + + +class TestStaticDeviceManage(unittest.TestCase): + def test_cpu_device(self): + paddle.set_device('cpu') + out1 = paddle.zeros(shape=[1, 3], dtype='float32') + out2 = paddle.ones(shape=[1, 3], dtype='float32') + out3 = paddle.concat(x=[out1, out2], axis=0) + exe = paddle.fluid.Executor() + exe.run(paddle.fluid.default_startup_program()) + res = exe.run(fetch_list=[out3]) + device = paddle.get_device() + self.assertEqual(isinstance(exe.place, core.CPUPlace), True) + self.assertEqual(device, "cpu") + + def test_gpu_device(self): + if core.is_compiled_with_cuda(): + out1 = paddle.zeros(shape=[1, 3], dtype='float32') + out2 = paddle.ones(shape=[1, 3], dtype='float32') + out3 = paddle.concat(x=[out1, out2], axis=0) + paddle.set_device('gpu:0') + exe = paddle.fluid.Executor() + exe.run(paddle.fluid.default_startup_program()) + res = exe.run(fetch_list=[out3]) + device = paddle.get_device() + self.assertEqual(isinstance(exe.place, core.CUDAPlace), True) + self.assertEqual(device, "gpu:0") + + +class TestImperativeDeviceManage(unittest.TestCase): + def test_cpu(self): + with fluid.dygraph.guard(): + paddle.set_device('cpu') + out1 = paddle.zeros(shape=[1, 3], dtype='float32') + out2 = paddle.ones(shape=[1, 3], dtype='float32') + out3 = paddle.concat(x=[out1, out2], axis=0) + device = paddle.get_device() + self.assertEqual( + isinstance(framework._current_expected_place(), core.CPUPlace), + True) + self.assertEqual(device, "cpu") + + def test_gpu(self): + if core.is_compiled_with_cuda(): + with fluid.dygraph.guard(): + paddle.set_device('gpu:0') + out1 = paddle.zeros(shape=[1, 3], dtype='float32') + out2 = paddle.ones(shape=[1, 3], dtype='float32') + out3 = paddle.concat(x=[out1, out2], axis=0) + device = paddle.get_device() + self.assertEqual( + isinstance(framework._current_expected_place(), + core.CUDAPlace), True) + self.assertEqual(device, "gpu:0") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py index b6566676d2533aad5272fe61dbedbc1d55ea213b..8bf40459902e09f19a5badce62084841a0a23619 100644 --- a/python/paddle/fluid/tests/unittests/test_diag.py +++ b/python/paddle/fluid/tests/unittests/test_diag.py @@ -17,11 +17,173 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid import Program, program_guard +class TestDiagV2Op(OpTest): + def setUp(self): + self.op_type = "diag_v2" + self.x = np.random.rand(10, 10) + self.offset = 0 + self.padding_value = 0.0 + self.out = np.diag(self.x, self.offset) + + self.init_config() + self.inputs = {'X': self.x} + self.attrs = { + 'offset': self.offset, + 'padding_value': self.padding_value + } + self.outputs = {'Out': self.out} + + def test_check_output(self): + self.check_output() + + def init_config(self): + pass + + +class TestDiagV2OpCase1(TestDiagV2Op): + def init_config(self): + self.offset = 1 + self.out = np.diag(self.x, self.offset) + + +class TestDiagV2OpCase2(TestDiagV2Op): + def init_config(self): + self.offset = -1 + self.out = np.diag(self.x, self.offset) + + +class TestDiagV2OpCase3(TestDiagV2Op): + def init_config(self): + self.x = np.random.randint(-10, 10, size=(10, 10)) + self.out = np.diag(self.x, self.offset) + + +class TestDiagV2OpCase4(TestDiagV2Op): + def init_config(self): + self.x = np.random.rand(100) + self.padding_value = 8 + n = self.x.size + self.out = self.padding_value * np.ones((n, n)) + np.diag( + self.x, self.offset) - np.diag(self.padding_value * np.ones(n)) + + +class TestDiagV2Error(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + + def test_diag_v2_type(): + x = [1, 2, 3] + output = paddle.diag(x) + + self.assertRaises(TypeError, test_diag_v2_type) + + +class TestDiagV2API(unittest.TestCase): + def setUp(self): + self.input_np = np.random.random(size=(10, 10)).astype(np.float32) + self.expected0 = np.diag(self.input_np) + self.expected1 = np.diag(self.input_np, k=1) + self.expected2 = np.diag(self.input_np, k=-1) + + self.input_np2 = np.random.rand(100) + self.offset = 0 + self.padding_value = 8 + n = self.input_np2.size + self.expected3 = self.padding_value * np.ones( + (n, n)) + np.diag(self.input_np2, self.offset) - np.diag( + self.padding_value * np.ones(n)) + + self.input_np3 = np.random.randint(-10, 10, size=(100)).astype(np.int64) + self.padding_value = 8.0 + n = self.input_np3.size + self.expected4 = self.padding_value * np.ones( + (n, n)) + np.diag(self.input_np3, self.offset) - np.diag( + self.padding_value * np.ones(n)) + + self.padding_value = -8 + self.expected5 = self.padding_value * np.ones( + (n, n)) + np.diag(self.input_np3, self.offset) - np.diag( + self.padding_value * np.ones(n)) + + def run_imperative(self): + x = paddle.to_tensor(self.input_np) + y = paddle.diag(x) + self.assertTrue(np.allclose(y.numpy(), self.expected0)) + + y = paddle.diag(x, offset=1) + self.assertTrue(np.allclose(y.numpy(), self.expected1)) + + y = paddle.diag(x, offset=-1) + self.assertTrue(np.allclose(y.numpy(), self.expected2)) + + x = paddle.to_tensor(self.input_np2) + y = paddle.diag(x, padding_value=8) + self.assertTrue(np.allclose(y.numpy(), self.expected3)) + + x = paddle.to_tensor(self.input_np3) + y = paddle.diag(x, padding_value=8.0) + self.assertTrue(np.allclose(y.numpy(), self.expected4)) + + y = paddle.diag(x, padding_value=-8) + self.assertTrue(np.allclose(y.numpy(), self.expected5)) + + def run_static(self, use_gpu=False): + x = paddle.data(name='input', shape=[10, 10], dtype='float32') + x2 = paddle.data(name='input2', shape=[100], dtype='float64') + x3 = paddle.data(name='input3', shape=[100], dtype='int64') + result0 = paddle.diag(x) + result1 = paddle.diag(x, offset=1) + result2 = paddle.diag(x, offset=-1) + result3 = paddle.diag(x, name='aaa') + result4 = paddle.diag(x2, padding_value=8) + result5 = paddle.diag(x3, padding_value=8.0) + result6 = paddle.diag(x3, padding_value=-8) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + res0, res1, res2, res4, res5, res6 = exe.run( + feed={ + "input": self.input_np, + "input2": self.input_np2, + 'input3': self.input_np3 + }, + fetch_list=[result0, result1, result2, result4, result5, result6]) + + self.assertTrue(np.allclose(res0, self.expected0)) + self.assertTrue(np.allclose(res1, self.expected1)) + self.assertTrue(np.allclose(res2, self.expected2)) + self.assertTrue('aaa' in result3.name) + self.assertTrue(np.allclose(res4, self.expected3)) + self.assertTrue(np.allclose(res5, self.expected4)) + self.assertTrue(np.allclose(res6, self.expected5)) + + def test_cpu(self): + paddle.disable_static(place=paddle.fluid.CPUPlace()) + self.run_imperative() + paddle.enable_static() + + with fluid.program_guard(fluid.Program()): + self.run_static() + + def test_gpu(self): + if not fluid.core.is_compiled_with_cuda(): + return + + paddle.disable_static(place=paddle.fluid.CUDAPlace(0)) + self.run_imperative() + paddle.enable_static() + + with fluid.program_guard(fluid.Program()): + self.run_static(use_gpu=True) + + class TestDiagOp(OpTest): def setUp(self): self.op_type = "diag" diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 4dc2c92ad918c269d5277da0c13d4a96182a253d..bc858828058079e7d54d3c753807725ce654a778 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -26,8 +26,8 @@ import paddle class TestDirectory(unittest.TestCase): def get_import_command(self, module): paths = module.split('.') - if len(paths) <= 1: - return module + if len(paths) == 1: + return 'import {}'.format(module) package = '.'.join(paths[:-1]) func = paths[-1] cmd = 'from {} import {}'.format(package, func) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 28bd637726ebe6693296c2c585bed69cb1f1ab24..9df55a6b873e28a6e479fd05b31074802eb19bb7 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -17,7 +17,7 @@ import time import unittest import paddle -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): @@ -55,7 +55,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -100,7 +100,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py index 9cd35f1754ff7f81f918635d6bffc566228b0543..59ca41a11e325cfb66a3a3eaadb4eca6f9764212 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py @@ -15,7 +15,7 @@ import unittest import paddle import os -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker import time @@ -55,7 +55,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True strategy.a_sync_configs = {"k_steps": 100} - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index c8130d62c304b9c1a9aa6623378456806adf3da2..e0993e022e1b9570773634ec829b088c5ff145ea 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -16,7 +16,7 @@ import unittest import paddle import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker import time @@ -47,7 +47,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = False - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index f72850f949715caa7282e6ca85f148a75ebc1074..beb0069eb770f25d7834749ff9c188e5252e13c0 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -31,10 +31,11 @@ import time import tempfile import unittest +import paddle import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker from paddle.distributed.fleet.base.util_factory import fleet_util -from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet +from paddle.distributed.fleet import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory __all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main'] @@ -56,7 +57,7 @@ class FleetDistRunnerBase(object): if args.role.upper() == "PSERVER": role = role_maker.UserDefinedRoleMaker( is_collective=False, - init_gloo=True, + init_gloo=False, path=args.gloo_path, current_id=args.current_id, role=role_maker.Role.SERVER, @@ -65,7 +66,7 @@ class FleetDistRunnerBase(object): else: role = role_maker.UserDefinedRoleMaker( is_collective=False, - init_gloo=True, + init_gloo=False, path=args.gloo_path, current_id=args.current_id, role=role_maker.Role.WORKER, @@ -75,21 +76,23 @@ class FleetDistRunnerBase(object): return role def build_strategy(self, args): - self.strategy = None + self.strategy = paddle.distributed.fleet.DistributedStrategy() + self.strategy.a_sync = False if args.mode == "async": - self.strategy = StrategyFactory.create_async_strategy() - elif args.mode == "sync": - self.strategy = StrategyFactory.create_sync_strategy() - elif args.mode == "half_async": - self.strategy = StrategyFactory.create_half_async_strategy() + self.strategy = paddle.distributed.fleet.DistributedStrategy() + self.strategy.a_sync = True elif args.mode == "geo": - self.strategy = StrategyFactory.create_geo_strategy( - args.geo_sgd_need_push_nums) + self.strategy = paddle.distributed.fleet.DistributedStrategy() + self.strategy.a_sync = True + self.strategy.a_sync_configs = { + "k_steps": args.geo_sgd_need_push_nums + } self.dump_param = os.getenv("dump_param", "").split(",") self.dump_fields = os.getenv("dump_fields", "").split(",") self.dump_fields_path = os.getenv("dump_fields_path", "") debug = int(os.getenv("Debug", "0")) - if debug: + # TODO(update strategy to support dump params) + if False: #debug: self.strategy.set_debug_opt({ "dump_param": self.dump_param, "dump_fields": self.dump_fields, @@ -122,7 +125,7 @@ class FleetDistRunnerBase(object): staircase=True)) else: optimizer = fluid.optimizer.SGD(LEARNING_RATE) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) def run_pserver(self, args): @@ -157,7 +160,13 @@ class TestFleetBase(unittest.TestCase): def _setup_config(self): raise NotImplementedError("tests should have _setup_config implemented") + def tearDown(self): + t = time.time() - self.startTime + print('%s: %.3f' % (self.__class__.__name__, t)) + def setUp(self): + self.startTime = time.time() + self._mode = "sync" self._reader = "pyreader" self._trainers = 2 @@ -278,6 +287,23 @@ class TestFleetBase(unittest.TestCase): tr0_ret = tr0.returncode tr1_ret = tr0.returncode + if tr0_ret != 0: + print( + "========================Error tr0_err begin===========================" + ) + os.system("cat {}".format(tempfile.gettempdir() + "/tr0_err.log")) + print( + "========================Error tr0_err end===========================" + ) + + if tr1_ret != 0: + print( + "========================Error tr1_err begin===========================" + ) + os.system("cat {}".format(tempfile.gettempdir() + "/tr1_err.log")) + print( + "========================Error tr1_err end===========================" + ) self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index 18629c4f996a6d068339bd6cad494e8e8d21123f..b506f179143412e2bdb5d9eda511d90a0a3eea6d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -22,7 +22,7 @@ from test_dist_fleet_base import TestFleetBase class TestDistMnistSync2x2(TestFleetBase): def _setup_config(self): - self._mode = "async" + self._mode = "sync" self._reader = "pyreader" def check_with_place(self, @@ -123,7 +123,7 @@ class TestDistMnistAsyncDataset2x2(TestFleetBase): class TestDistCtrHalfAsync2x2(TestFleetBase): def _setup_config(self): - self._mode = "half_async" + self._mode = "async" self._reader = "pyreader" def check_with_place(self, @@ -156,5 +156,40 @@ class TestDistCtrHalfAsync2x2(TestFleetBase): "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) +class TestDistCtrPsGpuPyreaderAsync2x2(TestFleetBase): + def _setup_config(self): + self._mode = "async" + self._reader = "pyreader" + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "30000", # 5sec to fail fast + "http_proxy": "", + "FLAGS_communicator_send_queue_size": "2", + "FLAGS_communicator_max_merge_var_num": "2", + "CPU_NUM": "2", + "SAVE_MODEL": "1" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_ctr_ps_gpu.py", delta=1e-5, check_error_log=True) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py new file mode 100644 index 0000000000000000000000000000000000000000..b4bc0d8dadce44c8f711189466f34fb5cd76f39f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py @@ -0,0 +1,190 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import shutil +import tempfile +import unittest +import subprocess +import time +import paddle.fluid as fluid +#import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +from test_dist_fleet_base import TestFleetBase + +#from dist_simnet_bow import train_network + + +class TestDistGloo_2x2(TestFleetBase): + def _setup_config(self): + self._mode = "sync" + self._reader = "pyreader" + self._path = "./tmp4" + if (os.path.exists(self._path)): + shutil.rmtree(self._path) + # if not os.path.exists(self._path): + # os.mkdir(self._path) + + def _start_pserver(self, cmd, required_envs): + #env.update(required_envs) + ps0_cmd = cmd + ps1_cmd = cmd + + ps0_pipe = open(tempfile.gettempdir() + "/ps0_err.log", "wb+") + ps1_pipe = open(tempfile.gettempdir() + "/ps1_err.log", "wb+") + + required_envs["POD_IP"] = "127.0.0.1" + required_envs["PADDLE_PSERVER_ID"] = "0" + required_envs["PADDLE_PORT"] = "36011" + ps0_proc = subprocess.Popen( + ps0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps0_pipe, + env=required_envs) + print("PADDLE_PSERVER_ID=0:") + print(required_envs) + required_envs["PADDLE_PSERVER_ID"] = "1" + required_envs["PADDLE_PORT"] = "36012" + ps1_proc = subprocess.Popen( + ps1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=ps1_pipe, + env=required_envs) + print("PADDLE_PSERVER_ID=1:") + print(required_envs) + return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe + + def _start_trainer(self, cmd, required_envs): + #env.update(required_envs) + + tr0_cmd = cmd + tr1_cmd = cmd + + tr0_pipe = open(tempfile.gettempdir() + "/tr0_err.log", "wb+") + tr1_pipe = open(tempfile.gettempdir() + "/tr1_err.log", "wb+") + required_envs["PADDLE_TRAINER_ID"] = "0" + tr0_proc = subprocess.Popen( + tr0_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr0_pipe, + env=required_envs) + print("PADDLE_TRAINER_ID=0:") + print(required_envs) + required_envs["PADDLE_TRAINER_ID"] = "1" + tr1_proc = subprocess.Popen( + tr1_cmd.strip().split(" "), + stdout=subprocess.PIPE, + stderr=tr1_pipe, + env=required_envs) + print("PADDLE_TRAINER_ID=1:") + print(required_envs) + return tr0_proc, tr1_proc, tr0_pipe, tr1_pipe + + def _run_cluster(self, model, envs): + env = {'GRAD_CLIP': str(self._grad_clip_mode)} + python_path = self._python_interp + if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': + envs['COVERAGE_FILE'] = os.getenv('COVERAGE_FILE', '') + python_path += " -m coverage run --branch -p" + env.update(envs) + + tr_cmd = "{0} {1}".format(python_path, model) + + ps_cmd = "{0} {1}".format(python_path, model) + + # Run dist train to compare with local results + env["TRAINING_ROLE"] = "PSERVER" + ps0, ps1, ps0_pipe, ps1_pipe = self._start_pserver(ps_cmd, env) + print(ps_cmd) + env["TRAINING_ROLE"] = "TRAINER" + tr0, tr1, tr0_pipe, tr1_pipe = self._start_trainer(tr_cmd, env) + + # Wait until trainer process terminate + while True: + stat0 = tr0.poll() + time.sleep(0.1) + if stat0 is not None: + break + + while True: + stat1 = tr1.poll() + time.sleep(0.1) + if stat1 is not None: + break + + tr0_out, tr0_err = tr0.communicate() + tr1_out, tr1_err = tr1.communicate() + + tr0_ret = tr0.returncode + tr1_ret = tr0.returncode + + self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") + self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") + + # close trainer file + tr0_pipe.close() + tr1_pipe.close() + ps0_pipe.close() + ps1_pipe.close() + + ps0.terminate() + ps1.terminate() + + return 0, 0 + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "CPU_NUM": "2", + #PSERVER + "PADDLE_PSERVERS_IP_PORT_LIST": "127.0.0.1:36011,127.0.0.1:36012", + #"PADDLE_PSERVER_PORT_ARRAY":"(36011 36012)", + "PADDLE_PSERVER_NUMS": "2", + "PADDLE_TRAINER_ID": "0", + #TRAINER + "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36013,127.0.0.1:36014", + "PADDLE_TRAINERS_NUM": "2", + "PADDLE_PSERVER_ID": "0", + #GLOO FLAG + "PADDLE_WITH_GLOO": "1", + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + print("path is not delete", os.path.exists("./tmp4")) + self.check_with_place( + "dist_fleet_debug_gloo.py", delta=1e-5, check_error_log=True) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index 5fcf5d894b2ee7a7d795008fcb4edef927827dc3..e7b10be2349cce755267297025ca8520b6d494ee 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -21,7 +21,7 @@ import shutil import paddle import paddle.fluid as fluid -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet # For Net @@ -165,7 +165,7 @@ class TestPSPassWithBow(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py index 761d57408b9a8f9e52419331bfb0bca5b0135c30..1062123948481a4164a12a4bed818b964923006f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py @@ -113,8 +113,8 @@ class TranspilerAsyncLRDecayTest(unittest.TestCase): ["listen_and_serv"]) # block1: sum,cast,scale,floor,fill_constant,elementwise_pow,scale self.assertEqual([op.type for op in pserver.blocks[1].ops], [ - "sum", "cast", "scale", "floor", "fill_constant", "elementwise_pow", - "scale" + "sum", "cast", "fill_constant", "elementwise_div", "floor", + "fill_constant", "elementwise_pow", "scale" ]) # block1~2: optimize pass diff --git a/python/paddle/fluid/tests/unittests/test_distribution.py b/python/paddle/fluid/tests/unittests/test_distribution.py new file mode 100644 index 0000000000000000000000000000000000000000..4ccaa3266e087a38ac38667c62487e69c6bb6bf6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_distribution.py @@ -0,0 +1,725 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +import paddle +from paddle import fluid +from paddle.fluid import layers +from paddle.distribution import * +import math + + +class DistributionNumpy(): + def sample(self): + raise NotImplementedError + + def entropy(self): + raise NotImplementedError + + def kl_divergence(self, other): + raise NotImplementedError + + def log_prob(self, value): + raise NotImplementedError + + def probs(self, value): + raise NotImplementedError + + +class UniformNumpy(DistributionNumpy): + def __init__(self, low, high): + self.low = np.array(low).astype('float32') + self.high = np.array(high).astype('float32') + + def sample(self, shape): + shape = tuple(shape) + (self.low + self.high).shape + return self.low + (np.random.uniform(size=shape) * + (self.high - self.low)) + + def log_prob(self, value): + lb = np.less(self.low, value).astype('float32') + ub = np.less(value, self.high).astype('float32') + return np.log(lb * ub) - np.log(self.high - self.low) + + def probs(self, value): + lb = np.less(self.low, value).astype('float32') + ub = np.less(value, self.high).astype('float32') + return (lb * ub) / (self.high - self.low) + + def entropy(self): + return np.log(self.high - self.low) + + +class NormalNumpy(DistributionNumpy): + def __init__(self, loc, scale): + self.loc = np.array(loc).astype('float32') + self.scale = np.array(scale).astype('float32') + + def sample(self, shape): + shape = tuple(shape) + (self.loc + self.scale).shape + return self.loc + (np.random.randn(*shape) * self.scale) + + def log_prob(self, value): + var = self.scale * self.scale + log_scale = np.log(self.scale) + return -((value - self.loc) * (value - self.loc)) / ( + 2. * var) - log_scale - math.log(math.sqrt(2. * math.pi)) + + def probs(self, value): + var = self.scale * self.scale + return np.exp(-1. * ((value - self.loc) * (value - self.loc)) / + (2. * var)) / (math.sqrt(2 * math.pi) * self.scale) + + def entropy(self): + return 0.5 + 0.5 * np.log(np.array(2. * math.pi).astype( + 'float32')) + np.log(self.scale) + + def kl_divergence(self, other): + var_ratio = (self.scale / other.scale) + var_ratio = var_ratio * var_ratio + t1 = ((self.loc - other.loc) / other.scale) + t1 = (t1 * t1) + return 0.5 * (var_ratio + t1 - 1 - np.log(var_ratio)) + + +class DistributionTest(unittest.TestCase): + def setUp(self, use_gpu=False): + self.use_gpu = use_gpu + if not use_gpu: + place = fluid.CPUPlace() + self.gpu_id = -1 + else: + place = fluid.CUDAPlace(0) + self.gpu_id = 0 + self.executor = fluid.Executor(place) + + def build_normal_common_net(self, batch_size, dims, loc_float, scale_float, + other_loc_float, other_scale_float, scale_np, + other_scale_np, loc_np, other_loc_np, loc, + scale, other_loc, other_scale, values): + normal_int = Normal(int(loc_float), int(scale_float)) + normal_float = Normal(loc_float, scale_float) + other_normal_float = Normal(other_loc_float, other_scale_float) + + normal_float_np_broadcast = Normal(loc_float, scale_np) + other_normal_float_np_broadcast = Normal(other_loc_float, + other_scale_np) + + normal_np = Normal(loc_np, scale_np) + other_normal_np = Normal(other_loc_np, other_scale_np) + + normal_variable = Normal(loc, scale) + other_normal_variable = Normal(other_loc, other_scale) + + sample_int = normal_int.sample([batch_size, dims]) + sample_float = normal_float.sample([batch_size, dims]) + sample_float_np_broadcast = normal_float_np_broadcast.sample( + [batch_size, dims]) + sample_np = normal_np.sample([batch_size, dims]) + sample_variable = normal_variable.sample([batch_size, dims]) + + entropy_int = normal_int.entropy() + entropy_float = normal_float.entropy() + entropy_float_np_broadcast = normal_float_np_broadcast.entropy() + entropy_np = normal_np.entropy() + entropy_variable = normal_variable.entropy() + + lp_float_np_broadcast = normal_float_np_broadcast.log_prob(values) + lp_np = normal_np.log_prob(values) + lp_variable = normal_variable.log_prob(values) + + p_float_np_broadcast = normal_float_np_broadcast.probs(values) + p_np = normal_np.probs(values) + p_variable = normal_variable.probs(values) + + kl_float = normal_float.kl_divergence(other_normal_float) + kl_float_np_broadcast = normal_float_np_broadcast.kl_divergence( + other_normal_float_np_broadcast) + kl_np = normal_np.kl_divergence(other_normal_np) + kl_variable = normal_variable.kl_divergence(other_normal_variable) + + fetch_list = [ + sample_int, sample_float, sample_float_np_broadcast, sample_np, + sample_variable, entropy_int, entropy_float, + entropy_float_np_broadcast, entropy_np, entropy_variable, + lp_float_np_broadcast, lp_np, lp_variable, p_float_np_broadcast, + p_np, p_variable, kl_float, kl_float_np_broadcast, kl_np, + kl_variable + ] + return fetch_list + + def build_normal_static(self, test_program, batch_size, dims, loc_float, + scale_float, other_loc_float, other_scale_float, + scale_np, other_scale_np, loc_np, other_loc_np, + values_np): + with fluid.program_guard(test_program): + loc = layers.data(name='loc', shape=[dims], dtype='float32') + scale = layers.data(name='scale', shape=[dims], dtype='float32') + + other_loc = layers.data( + name='other_loc', shape=[dims], dtype='float32') + other_scale = layers.data( + name='other_scale', shape=[dims], dtype='float32') + + values = layers.data(name='values', shape=[dims], dtype='float32') + + fetch_list = self.build_normal_common_net( + batch_size, dims, loc_float, scale_float, other_loc_float, + other_scale_float, scale_np, other_scale_np, loc_np, + other_loc_np, loc, scale, other_loc, other_scale, values) + + feed_vars = { + 'loc': loc_np, + 'scale': scale_np, + 'other_loc': other_loc_np, + 'other_scale': other_scale_np, + 'values': values_np + } + return feed_vars, fetch_list + + def build_normal_dygraph(self, batch_size, dims, loc_float, scale_float, + other_loc_float, other_scale_float, scale_np, + other_scale_np, loc_np, other_loc_np, values_np): + loc = paddle.to_tensor(loc_np) + scale = paddle.to_tensor(scale_np) + other_loc = paddle.to_tensor(other_loc_np) + other_scale = paddle.to_tensor(other_scale_np) + values = paddle.to_tensor(values_np) + + fetch_list = self.build_normal_common_net( + batch_size, dims, loc_float, scale_float, other_loc_float, + other_scale_float, scale_np, other_scale_np, loc_np, other_loc_np, + loc, scale, other_loc, other_scale, values) + fetch_list_numpy = [t.numpy() for t in fetch_list] + return fetch_list_numpy + + def get_normal_random_input(self, batch_size, dims): + loc_np = np.random.randn(batch_size, dims).astype('float32') + other_loc_np = np.random.randn(batch_size, dims).astype('float32') + + loc_float = (np.random.ranf() - 0.5) * 4 + scale_float = (np.random.ranf() - 0.5) * 4 + while scale_float < 0: + scale_float = (np.random.ranf() - 0.5) * 4 + + other_loc_float = (np.random.ranf() - 0.5) * 4 + other_scale_float = (np.random.ranf() - 0.5) * 4 + while other_scale_float < 0: + other_scale_float = (np.random.ranf() - 0.5) * 4 + + scale_np = np.random.randn(batch_size, dims).astype('float32') + other_scale_np = np.random.randn(batch_size, dims).astype('float32') + values_np = np.random.randn(batch_size, dims).astype('float32') + + while not np.all(scale_np > 0): + scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(other_scale_np > 0): + other_scale_np = np.random.randn(batch_size, dims).astype('float32') + return [ + loc_np, other_loc_np, loc_float, scale_float, other_loc_float, + other_scale_float, scale_np, other_scale_np, values_np + ] + + def compare_normal_with_numpy(self, + data_list, + output_list, + batch_size=2, + dims=3, + tolerance=1e-6): + loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list + + np_normal_int = NormalNumpy(int(loc_float), int(scale_float)) + np_normal_float = NormalNumpy(loc_float, scale_float) + np_other_normal_float = NormalNumpy(other_loc_float, other_scale_float) + np_normal_float_np_broadcast = NormalNumpy(loc_float, scale_np) + np_other_normal_float_np_broadcast = NormalNumpy(other_loc_float, + other_scale_np) + np_normal = NormalNumpy(loc_np, scale_np) + np_other_normal = NormalNumpy(other_loc_np, other_scale_np) + + gt_sample_int = np_normal_int.sample([batch_size, dims]) + gt_sample_float = np_normal_float.sample([batch_size, dims]) + gt_sample_float_np_broadcast = np_normal_float_np_broadcast.sample( + [batch_size, dims]) + gt_sample_np = np_normal.sample([batch_size, dims]) + gt_entropy_int = np_normal_int.entropy() + gt_entropy_float = np_normal_float.entropy() + gt_entropy_float_np_broadcast = np_normal_float_np_broadcast.entropy() + gt_entropy = np_normal.entropy() + gt_lp_float_np_broadcast = np_normal_float_np_broadcast.log_prob( + values_np) + gt_lp = np_normal.log_prob(values_np) + gt_p_float_np_broadcast = np_normal_float_np_broadcast.probs(values_np) + gt_p = np_normal.probs(values_np) + gt_kl_float = np_normal_float.kl_divergence(np_other_normal_float) + gt_kl_float_np_broadcast = np_normal_float_np_broadcast.kl_divergence( + np_other_normal_float_np_broadcast) + gt_kl = np_normal.kl_divergence(np_other_normal) + + [ + output_sample_int, output_sample_float, + output_sample_float_np_broadcast, output_sample_np, + output_sample_variable, output_entropy_int, output_entropy_float, + output_entropy_float_np_broadcast, output_entropy_np, + output_entropy_variable, output_lp_float_np_broadcast, output_lp_np, + output_lp_variable, output_p_float_np_broadcast, output_p_np, + output_p_variable, output_kl_float, output_kl_float_np_broadcast, + output_kl_np, output_kl_variable + ] = output_list + + np.testing.assert_allclose( + output_sample_int.shape, + gt_sample_int.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_float.shape, + gt_sample_float.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_float_np_broadcast.shape, + gt_sample_float_np_broadcast.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_np.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_variable.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_int, gt_entropy_int, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_entropy_float, + gt_entropy_float, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_float_np_broadcast, + gt_entropy_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_lp_float_np_broadcast, + gt_lp_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_p_float_np_broadcast, + gt_p_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_p_np, gt_p, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_p_variable, gt_p, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_kl_float, gt_kl_float, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_kl_float_np_broadcast, + gt_kl_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_kl_np, gt_kl, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_kl_variable, gt_kl, rtol=tolerance, atol=tolerance) + + def test_normal_distribution_static(self, + batch_size=2, + dims=3, + tolerance=1e-6): + test_program = fluid.Program() + data_list = self.get_normal_random_input(batch_size, dims) + loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list + + feed_vars, fetch_list = self.build_normal_static( + test_program, batch_size, dims, loc_float, scale_float, + other_loc_float, other_scale_float, scale_np, other_scale_np, + loc_np, other_loc_np, values_np) + self.executor.run(fluid.default_startup_program()) + + output_list = self.executor.run(program=test_program, + feed=feed_vars, + fetch_list=fetch_list) + + self.compare_normal_with_numpy(data_list, output_list, batch_size, dims, + tolerance) + + def test_normal_distribution_dygraph(self, + batch_size=2, + dims=3, + tolerance=1e-6): + paddle.disable_static() + data_list = self.get_normal_random_input(batch_size, dims) + loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list + + output_list = self.build_normal_dygraph( + batch_size, dims, loc_float, scale_float, other_loc_float, + other_scale_float, scale_np, other_scale_np, loc_np, other_loc_np, + values_np) + + self.compare_normal_with_numpy(data_list, output_list, batch_size, dims, + tolerance) + paddle.enable_static() + + def build_uniform_common_net(self, batch_size, dims, low_float, high_float, + high_np, low_np, values_np, low, high, values): + uniform_int = Uniform(int(low_float), int(high_float)) + uniform_float = Uniform(low_float, high_float) + uniform_float_np_broadcast = Uniform(low_float, high_np) + uniform_np = Uniform(low_np, high_np) + uniform_variable = Uniform(low, high) + + sample_int = uniform_int.sample([batch_size, dims]) + sample_float = uniform_float.sample([batch_size, dims]) + sample_float_np_broadcast = uniform_float_np_broadcast.sample( + [batch_size, dims]) + sample_np = uniform_np.sample([batch_size, dims]) + sample_variable = uniform_variable.sample([batch_size, dims]) + + entropy_int = uniform_int.entropy() + entropy_float = uniform_float.entropy() + entropy_float_np_broadcast = uniform_float_np_broadcast.entropy() + entropy_np = uniform_np.entropy() + entropy_variable = uniform_variable.entropy() + + lp_float_np_broadcast = uniform_float_np_broadcast.log_prob(values) + lp_np = uniform_np.log_prob(values) + lp_variable = uniform_variable.log_prob(values) + + p_float_np_broadcast = uniform_float_np_broadcast.probs(values) + p_np = uniform_np.probs(values) + p_variable = uniform_variable.probs(values) + + fetch_list = [ + sample_int, sample_float, sample_float_np_broadcast, sample_np, + sample_variable, entropy_int, entropy_float, + entropy_float_np_broadcast, entropy_np, entropy_variable, + lp_float_np_broadcast, lp_np, lp_variable, p_float_np_broadcast, + p_np, p_variable + ] + return fetch_list + + def build_uniform_static(self, test_program, batch_size, dims, low_float, + high_float, high_np, low_np, values_np): + with fluid.program_guard(test_program): + low = layers.data(name='low', shape=[dims], dtype='float32') + high = layers.data(name='high', shape=[dims], dtype='float32') + + values = layers.data(name='values', shape=[dims], dtype='float32') + + fetch_list = self.build_uniform_common_net( + batch_size, dims, low_float, high_float, high_np, low_np, + values_np, low, high, values) + + feed_vars = {'low': low_np, 'high': high_np, 'values': values_np} + return feed_vars, fetch_list + + def build_uniform_dygraph(self, batch_size, dims, low_float, high_float, + high_np, low_np, values_np): + low = paddle.to_tensor(low_np) + high = paddle.to_tensor(high_np) + values = paddle.to_tensor(values_np) + + fetch_list = self.build_uniform_common_net(batch_size, dims, low_float, + high_float, high_np, low_np, + values_np, low, high, values) + fetch_list_numpy = [t.numpy() for t in fetch_list] + return fetch_list_numpy + + def compare_uniform_with_numpy(self, + data_list, + output_list, + batch_size=2, + dims=3, + tolerance=1e-6): + [low_np, low_float, high_float, high_np, values_np] = data_list + + np_uniform_int = UniformNumpy(int(low_float), int(high_float)) + np_uniform_float = UniformNumpy(low_float, high_float) + np_uniform_float_np_broadcast = UniformNumpy(low_float, high_np) + np_uniform = UniformNumpy(low_np, high_np) + + gt_sample_int = np_uniform_int.sample([batch_size, dims]) + gt_sample_float = np_uniform_float.sample([batch_size, dims]) + gt_sample_float_np_broadcast = np_uniform_float_np_broadcast.sample( + [batch_size, dims]) + gt_sample_np = np_uniform.sample([batch_size, dims]) + gt_entropy_int = np_uniform_int.entropy() + gt_entropy_float = np_uniform_float.entropy() + gt_entropy_float_np_broadcast = np_uniform_float_np_broadcast.entropy() + gt_entropy = np_uniform.entropy() + gt_lp_float_np_broadcast = np_uniform_float_np_broadcast.log_prob( + values_np) + gt_lp = np_uniform.log_prob(values_np) + gt_p_float_np_broadcast = np_uniform_float_np_broadcast.probs(values_np) + gt_p = np_uniform.probs(values_np) + + [ + output_sample_int, output_sample_float, + output_sample_float_np_broadcast, output_sample_np, + output_sample_variable, output_entropy_int, output_entropy_float, + output_entropy_float_np_broadcast, output_entropy_np, + output_entropy_variable, output_lp_float_np_broadcast, output_lp_np, + output_lp_variable, output_p_float_np_broadcast, output_p_np, + output_p_variable + ] = output_list + + np.testing.assert_allclose( + output_sample_int.shape, + gt_sample_int.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_float.shape, + gt_sample_float.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_float_np_broadcast.shape, + gt_sample_float_np_broadcast.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_np.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_sample_variable.shape, + gt_sample_np.shape, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_int, gt_entropy_int, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_entropy_float, + gt_entropy_float, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_float_np_broadcast, + gt_entropy_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_lp_float_np_broadcast, + gt_lp_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_p_float_np_broadcast, + gt_p_float_np_broadcast, + rtol=tolerance, + atol=tolerance) + np.testing.assert_allclose( + output_p_np, gt_p, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose( + output_p_variable, gt_p, rtol=tolerance, atol=tolerance) + + def test_uniform_distribution_static(self, + batch_size=2, + dims=3, + tolerance=1e-6): + test_program = fluid.Program() + + low_np = np.random.randn(batch_size, dims).astype('float32') + low_float = np.random.uniform(-2, 1) + high_float = np.random.uniform(1, 3) + high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float32') + values_np = np.random.randn(batch_size, dims).astype('float32') + + data_list = [low_np, low_float, high_float, high_np, values_np] + + feed_vars, fetch_list = self.build_uniform_static( + test_program, batch_size, dims, low_float, high_float, high_np, + low_np, values_np) + + self.executor.run(fluid.default_startup_program()) + + # result calculated by paddle + output_list = self.executor.run(program=test_program, + feed=feed_vars, + fetch_list=fetch_list) + self.compare_uniform_with_numpy(data_list, output_list, batch_size, + dims, tolerance) + + def test_uniform_distribution_dygraph(self, + batch_size=2, + dims=3, + tolerance=1e-6): + paddle.disable_static() + + low_np = np.random.randn(batch_size, dims).astype('float32') + low_float = np.random.uniform(-2, 1) + high_float = np.random.uniform(1, 3) + high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float32') + values_np = np.random.randn(batch_size, dims).astype('float32') + + data_list = [low_np, low_float, high_float, high_np, values_np] + output_list = self.build_uniform_dygraph( + batch_size, dims, low_float, high_float, high_np, low_np, values_np) + + self.compare_uniform_with_numpy(data_list, output_list, batch_size, + dims, tolerance) + paddle.enable_static() + + +class DistributionTestError(unittest.TestCase): + def test_distribution_error(self): + distribution = Distribution() + + self.assertRaises(NotImplementedError, distribution.sample) + self.assertRaises(NotImplementedError, distribution.entropy) + + normal = Normal(0.0, 1.0) + self.assertRaises(NotImplementedError, distribution.kl_divergence, + normal) + + value_npdata = np.array([0.8], dtype="float32") + value_tensor = layers.create_tensor(dtype="float32") + self.assertRaises(NotImplementedError, distribution.log_prob, + value_tensor) + self.assertRaises(NotImplementedError, distribution.probs, value_tensor) + + def test_normal_error(self): + normal = Normal(0.0, 1.0) + + value = [1.0, 2.0] + # type of value must be variable + self.assertRaises(TypeError, normal.log_prob, value) + + value = [1.0, 2.0] + # type of value must be variable + self.assertRaises(TypeError, normal.probs, value) + + shape = 1.0 + # type of shape must be list + self.assertRaises(TypeError, normal.sample, shape) + + seed = 1.0 + # type of seed must be int + self.assertRaises(TypeError, normal.sample, [2, 3], seed) + + normal_other = Uniform(1.0, 2.0) + # type of other must be an instance of Normal + self.assertRaises(TypeError, normal.kl_divergence, normal_other) + + def test_uniform_error(self): + uniform = Uniform(0.0, 1.0) + + value = [1.0, 2.0] + # type of value must be variable + self.assertRaises(TypeError, uniform.log_prob, value) + + value = [1.0, 2.0] + # type of value must be variable + self.assertRaises(TypeError, uniform.probs, value) + + shape = 1.0 + # type of shape must be list + self.assertRaises(TypeError, uniform.sample, shape) + + seed = 1.0 + # type of seed must be int + self.assertRaises(TypeError, uniform.sample, [2, 3], seed) + + +class DistributionTestName(unittest.TestCase): + def get_prefix(self, string): + return (string.split('.')[0]) + + def test_normal_name(self): + name = 'test_normal' + normal1 = Normal(0.0, 1.0, name=name) + self.assertEqual(normal1.name, name) + + normal2 = Normal(0.0, 1.0) + self.assertEqual(normal2.name, 'Normal') + + paddle.enable_static() + + sample = normal1.sample([2]) + self.assertEqual(self.get_prefix(sample.name), name + '_sample') + + entropy = normal1.entropy() + self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') + + value_npdata = np.array([0.8], dtype="float32") + value_tensor = layers.create_tensor(dtype="float32") + layers.assign(value_npdata, value_tensor) + + lp = normal1.log_prob(value_tensor) + self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') + + p = normal1.probs(value_tensor) + self.assertEqual(self.get_prefix(p.name), name + '_probs') + + kl = normal1.kl_divergence(normal2) + self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence') + + def test_uniform_name(self): + name = 'test_uniform' + uniform1 = Uniform(0.0, 1.0, name=name) + self.assertEqual(uniform1.name, name) + + uniform2 = Uniform(0.0, 1.0) + self.assertEqual(uniform2.name, 'Uniform') + + paddle.enable_static() + + sample = uniform1.sample([2]) + self.assertEqual(self.get_prefix(sample.name), name + '_sample') + + entropy = uniform1.entropy() + self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') + + value_npdata = np.array([0.8], dtype="float32") + value_tensor = layers.create_tensor(dtype="float32") + layers.assign(value_npdata, value_tensor) + + lp = uniform1.log_prob(value_tensor) + self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') + + p = uniform1.probs(value_tensor) + self.assertEqual(self.get_prefix(p.name), name + '_probs') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index cc3910d1b0c828f572f5e618b7aa9c55ecb93987..d18c8e25974441a6989b18a0fe13bac91251de9d 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci +import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard @@ -236,5 +237,501 @@ class TestDropoutOpError(unittest.TestCase): self.assertRaises(TypeError, test_dtype) +class TestDropoutFAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[40, 40], dtype="float32") + res1 = paddle.nn.functional.dropout(x=input, p=0., training=False) + res2 = paddle.nn.functional.dropout( + x=input, p=0., axis=0, training=True, mode='upscale_in_train') + res3 = paddle.nn.functional.dropout( + x=input, p=0., axis=0, training=True, mode='downscale_in_infer') + res4 = paddle.nn.functional.dropout( + x=input, p=0., axis=0, training=False, mode='upscale_in_train') + res5 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=0, + training=False, + mode='downscale_in_infer') + res6 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res7 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=True, + mode='downscale_in_infer') + res8 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res9 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=False, + mode='downscale_in_infer') + res10 = paddle.nn.functional.dropout(x=input, p=1., training=True) + + in_np = np.random.random([40, 40]).astype("float32") + res_np = in_np + res_np2 = np.zeros_like(in_np) + + exe = fluid.Executor(place) + res_list = [res1, res2, res3, res4, res5, res6, res7, res8, res9] + for res in res_list: + fetches = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res]) + self.assertTrue(np.allclose(fetches[0], res_np)) + fetches2 = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res10]) + self.assertTrue(np.allclose(fetches2[0], res_np2)) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + in_np = np.random.random([40, 40]).astype("float32") + res_np = in_np + res_np2 = np.zeros_like(in_np) + input = fluid.dygraph.to_variable(in_np) + + res1 = paddle.nn.functional.dropout( + x=input, p=0., training=False) + res2 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=0, + training=True, + mode='upscale_in_train') + res3 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=0, + training=True, + mode='downscale_in_infer') + res4 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=0, + training=False, + mode='upscale_in_train') + res5 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=0, + training=False, + mode='downscale_in_infer') + res6 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=True, + mode='upscale_in_train') + res7 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=True, + mode='downscale_in_infer') + res8 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=False, + mode='upscale_in_train') + res9 = paddle.nn.functional.dropout( + x=input, + p=0., + axis=[0, 1], + training=False, + mode='downscale_in_infer') + res10 = paddle.nn.functional.dropout( + x=input, p=1., training=True) + + res_list = [res1, res2, res3, res4, res5, res6, res7, res8, res9] + for res in res_list: + self.assertTrue(np.allclose(res.numpy(), res_np)) + self.assertTrue(np.allclose(res10.numpy(), res_np2)) + + +class TestDropoutFAPIError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + + def test_Variable(): + # the input of dropout must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + paddle.nn.functional.dropout(x1, p=0.5) + + self.assertRaises(TypeError, test_Variable) + + def test_Variable2(): + # the input of dropout must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + paddle.nn.functional.dropout(x1, p=0.5, axis=0) + + self.assertRaises(TypeError, test_Variable2) + + def test_dtype(): + # the input dtype of dropout must be float32 or float64 + # float16 only can be set on GPU place + xr = fluid.data(name='xr', shape=[3, 4, 5, 6], dtype="int32") + paddle.nn.functional.dropout(xr, p=0.5) + + self.assertRaises(TypeError, test_dtype) + + def test_pdtype(): + # p should be int or float + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, p='0.5') + + self.assertRaises(TypeError, test_pdtype) + + def test_pvalue(): + # p should be 0.<=p<=1. + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, p=1.2) + + self.assertRaises(ValueError, test_pvalue) + + def test_mode(): + # mode should be 'downscale_in_infer' or 'upscale_in_train' + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, mode='abc') + + self.assertRaises(ValueError, test_mode) + + def test_axis(): + # axis should be int or list + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, axis=1.2) + + self.assertRaises(TypeError, test_axis) + + def test_axis_max(): + # maximum of axis should less than dimensions of x + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, axis=[0, 5]) + + self.assertRaises(ValueError, test_axis_max) + + def test_axis_len(): + # length of axis should not greater than dimensions of x + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, axis=[0, 1, 2, 3, 4]) + + self.assertRaises(ValueError, test_axis_len) + + +class TestDropoutCAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_np = np.random.random([40, 40]).astype("float32") + result_np = input_np + input = fluid.dygraph.to_variable(input_np) + m = paddle.nn.Dropout(p=0.) + m.eval() + result = m(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + +class TestDropout2dFAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 4, 5], dtype="float32") + res1 = paddle.nn.functional.dropout2d( + x=input, p=0., training=False, data_format='NCHW') + res2 = paddle.nn.functional.dropout2d( + x=input, p=0., training=False, data_format='NHWC') + + in_np = np.random.random([2, 3, 4, 5]).astype("float32") + res_np = in_np + + exe = fluid.Executor(place) + res_list = [res1, res2] + for res in res_list: + fetches = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res]) + self.assertTrue(np.allclose(fetches[0], res_np)) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + in_np = np.random.random([2, 3, 4, 5]).astype("float32") + res_np = in_np + input = fluid.dygraph.to_variable(in_np) + + res1 = paddle.nn.functional.dropout2d( + x=input, p=0., training=False, data_format='NCHW') + res2 = paddle.nn.functional.dropout2d( + x=input, p=0., training=False, data_format='NHWC') + + res_list = [res1, res2] + for res in res_list: + self.assertTrue(np.allclose(res.numpy(), res_np)) + + +class TestDropout2dFAPIError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + + def test_xdim(): + # dimentions of x should be 4 + x = fluid.data(name='x1', shape=[2, 3, 4, 5, 6], dtype="int32") + paddle.nn.functional.dropout2d(x) + + self.assertRaises(ValueError, test_xdim) + + def test_dataformat(): + # data_format should be 'NCHW' or 'NHWC' + x = fluid.data(name='x2', shape=[2, 3, 4, 5], dtype="int32") + paddle.nn.functional.dropout2d(x, data_format='CNHW') + + self.assertRaises(ValueError, test_dataformat) + + +class TestDropout2DCAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 4, 5]).astype("float32") + result_np = input_np + input = fluid.dygraph.to_variable(input_np) + m = paddle.nn.Dropout2D(p=0.) + m.eval() + result = m(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + +class TestDropout3dFAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 4, 5, 6], dtype="float32") + res1 = paddle.nn.functional.dropout3d( + x=input, p=0., training=False, data_format='NCDHW') + res2 = paddle.nn.functional.dropout3d( + x=input, p=0., training=False, data_format='NDHWC') + + in_np = np.random.random([2, 3, 4, 5, 6]).astype("float32") + res_np = in_np + + exe = fluid.Executor(place) + res_list = [res1, res2] + for res in res_list: + fetches = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res]) + self.assertTrue(np.allclose(fetches[0], res_np)) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + in_np = np.random.random([2, 3, 4, 5, 6]).astype("float32") + res_np = in_np + input = fluid.dygraph.to_variable(in_np) + + res1 = paddle.nn.functional.dropout3d( + x=input, p=0., training=False, data_format='NCDHW') + res2 = paddle.nn.functional.dropout3d( + x=input, p=0., training=False, data_format='NDHWC') + + res_list = [res1, res2] + for res in res_list: + self.assertTrue(np.allclose(res.numpy(), res_np)) + + +class TestDropout3dFAPIError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + + def test_xdim(): + # dimentions of x should be 5 + x = fluid.data(name='x1', shape=[2, 3, 4, 5], dtype="int32") + paddle.nn.functional.dropout3d(x) + + self.assertRaises(ValueError, test_xdim) + + def test_dataformat(): + # data_format should be 'NCDHW' or 'NDHWC' + x = fluid.data(name='x2', shape=[2, 3, 4, 5, 6], dtype="int32") + paddle.nn.functional.dropout3d(x, data_format='CNDHW') + + self.assertRaises(ValueError, test_dataformat) + + +class TestDropout3DCAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 4, 5, 6]).astype("float32") + result_np = input_np + input = fluid.dygraph.to_variable(input_np) + m = paddle.nn.Dropout3D(p=0.) + m.eval() + result = m(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + +class TestAlphaDropoutFAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[40, 40], dtype="float32") + res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) + res2 = paddle.nn.functional.alpha_dropout( + x=input, p=0., training=False) + + in_np = np.random.random([40, 40]).astype("float32") + res_np = in_np + + exe = fluid.Executor(place) + res_list = [res1, res2] + for res in res_list: + fetches = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res]) + self.assertTrue(np.allclose(fetches[0], res_np)) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + in_np = np.random.random([40, 40]).astype("float32") + res_np = in_np + input = fluid.dygraph.to_variable(in_np) + + res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) + res2 = paddle.nn.functional.alpha_dropout( + x=input, p=0., training=False) + + res_list = [res1, res2] + for res in res_list: + self.assertTrue(np.allclose(res.numpy(), res_np)) + + +class TestAlphaDropoutFAPIError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + + def test_Variable(): + # the input of dropout must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + paddle.nn.functional.alpha_dropout(x1, p=0.5) + + self.assertRaises(TypeError, test_Variable) + + def test_dtype(): + # the input dtype of dropout must be float32 or float64 + xr = fluid.data(name='xr', shape=[3, 4, 5, 6], dtype="int32") + paddle.nn.functional.alpha_dropout(xr) + + self.assertRaises(TypeError, test_dtype) + + def test_pdtype(): + # p should be int or float + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.alpha_dropout(x2, p='0.5') + + self.assertRaises(TypeError, test_pdtype) + + def test_pvalue(): + # p should be 0.<=p<=1. + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.alpha_dropout(x2, p=1.2) + + self.assertRaises(ValueError, test_pvalue) + + +class TestAlphaDropoutCAPI(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_np = np.random.random([40, 40]).astype("float32") + result_np = input_np + input = fluid.dygraph.to_variable(input_np) + m = paddle.nn.AlphaDropout(p=0.) + m.eval() + result = m(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..466226c53fabbd315acd19c6421f210d0ca225c1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py @@ -0,0 +1,183 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy +import collections +from functools import reduce +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.nn.utils import weight_norm, remove_weight_norm + + +class TestDygraphWeightNorm(unittest.TestCase): + def setUp(self): + self.init_test_case() + self.set_data() + + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.dim = None + + def set_data(self): + self.data = collections.OrderedDict() + for desc in self.data_desc: + data_name = desc[0] + data_shape = desc[1] + data_value = numpy.random.random( + size=[self.batch_size] + data_shape).astype('float32') + self.data[data_name] = data_value + + def norm_except_dim(self, w, dim=None): + shape = w.shape + ndims = len(shape) + shape_numel = reduce(lambda x, y: x * y, shape) + if dim == -1: + return numpy.linalg.norm(w, axis=None, keepdims=True) + elif dim == 0: + tile_shape = list(w.shape) + tile_shape[0] = 1 + w_matrix = numpy.reshape(w, (shape[0], shape_numel // shape[0])) + return numpy.linalg.norm(w_matrix, axis=1, keepdims=True) + elif dim == (ndims - 1): + w_matrix = numpy.reshape(w, (shape_numel // shape[-1], shape[-1])) + return numpy.linalg.norm(w_matrix, axis=0, keepdims=True) + else: + perm = list(range(ndims)) + perm_ori = list(range(ndims)) + perm[0] = dim + perm[dim] = 0 + p_transposed = numpy.transpose(w, perm) + return self.norm_except_dim(p_transposed, 0) + + def weight_normalize(self, w, dim=None): + shape = w.shape + ndims = len(shape) + shape_numel = reduce(lambda x, y: x * y, shape) + v = w + g = self.norm_except_dim(w, dim) + g_mul = g + + if dim == -1: + v_norm = v / (numpy.linalg.norm(v, axis=None, keepdims=True)) + elif dim == 0: + w_matrix = numpy.reshape(w, (shape[0], shape_numel // shape[0])) + v_norm = v / numpy.linalg.norm(w_matrix, axis=1) + v_norm = numpy.reshape(v_norm, shape) + g = numpy.squeeze(g, axis=1) + elif dim == (ndims - 1): + w_matrix = numpy.reshape(w, (shape_numel // shape[-1], shape[-1])) + v_norm = v / numpy.linalg.norm(w_matrix, axis=0, keepdims=True) + v_norm = numpy.reshape(v_norm, shape) + else: + perm = list(range(ndims)) + perm[0] = dim + perm[dim] = 0 + p_transposed = numpy.transpose(v, perm) + transposed_shape = p_transposed.shape + transposed_shape_numel = reduce(lambda x, y: x * y, + transposed_shape) + p_matrix = numpy.reshape( + p_transposed, (p_transposed.shape[0], + transposed_shape_numel // p_transposed.shape[0])) + v_norm = v / numpy.expand_dims( + numpy.expand_dims( + numpy.linalg.norm( + p_matrix, axis=1, keepdims=True), axis=0), + axis=(ndims - 1)) + v_norm = numpy.reshape(v_norm, transposed_shape) + v_norm = numpy.transpose(v_norm, perm) + g = numpy.squeeze(g, axis=1) + if dim == 1: + eaxis = 2 + elif dim == 2: + eaxis = 1 + g_mul = numpy.expand_dims( + numpy.expand_dims( + numpy.expand_dims( + g, axis=0), axis=eaxis), + axis=(ndims - 1)) + w = g_mul * v_norm + return g, v + + def test_check_output(self): + fluid.enable_imperative() + linear = paddle.nn.Conv2d(2, 3, 3) + before_weight = linear.weight.numpy() + if self.dim == None: + self.dim = -1 + wn = weight_norm(linear, dim=self.dim) + outputs = [] + for name, data in self.data.items(): + output = linear(fluid.dygraph.to_variable(data)) + outputs.append(output.numpy()) + after_weight = linear.weight + self.actual_outputs = [linear.weight_g.numpy(), linear.weight_v.numpy()] + + expect_output = self.weight_normalize(before_weight, self.dim) + + for expect, actual in zip(expect_output, self.actual_outputs): + self.assertTrue( + numpy.allclose( + numpy.array(actual), expect, atol=0.001)) + + +class TestDygraphWeightNormCase1(TestDygraphWeightNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.dim = 0 + + +class TestDygraphWeightNormCase2(TestDygraphWeightNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.dim = 1 + + +class TestDygraphWeightNormCase3(TestDygraphWeightNorm): + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.dim = 3 + + +class TestDygraphRemoveWeightNorm(unittest.TestCase): + def setUp(self): + self.init_test_case() + + def init_test_case(self): + self.batch_size = 3 + self.data_desc = (['x', [2, 3, 3]], ) + self.dim = None + + def test_check_output(self): + fluid.enable_imperative() + linear = paddle.nn.Conv2d(2, 3, 3) + before_weight = linear.weight + wn = weight_norm(linear, dim=self.dim) + rwn = remove_weight_norm(linear) + after_weight = linear.weight + self.assertTrue( + numpy.allclose( + before_weight.numpy(), after_weight.numpy(), atol=0.001)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 6eeb355a6ba3a9c20156ebfd1389d50e92a5a0f5..c941d7c5f34352ac0e762403d0e7e3f0238cbe36 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -397,7 +397,7 @@ class TestAddOp(unittest.TestCase): y_1 = paddle.add(x, y, name='add_res') self.assertEqual(('add_res' in y_1.name), True) - def test_alpha(self): + def test_declarative(self): with fluid.program_guard(fluid.Program()): def gen_data(): @@ -408,33 +408,12 @@ class TestAddOp(unittest.TestCase): x = fluid.data(name="x", shape=[3], dtype='float32') y = fluid.data(name="y", shape=[3], dtype='float32') - z = paddle.add(x, y, alpha=10) + z = paddle.add(x, y) place = fluid.CPUPlace() exe = fluid.Executor(place) z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) - z_expected = np.array([12., 53., 24.]) - self.assertEqual((z_value == z_expected).all(), True) - - def test_alpha_gpu(self): - if not fluid.core.is_compiled_with_cuda(): - return - with fluid.program_guard(fluid.Program()): - - def gen_data(): - return { - "x": np.array([2, 3, 4]).astype('float32'), - "y": np.array([1, 5, 2]).astype('float32') - } - - x = fluid.data(name="x", shape=[3], dtype='float32') - y = fluid.data(name="y", shape=[3], dtype='float32') - z = paddle.add(x, y, alpha=-0.5) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) - z_expected = np.array([1.5, 0.5, 3.]) + z_expected = np.array([3., 8., 6.]) self.assertEqual((z_value == z_expected).all(), True) def test_dygraph(self): @@ -443,9 +422,9 @@ class TestAddOp(unittest.TestCase): np_y = np.array([1, 5, 2]).astype('float64') x = fluid.dygraph.to_variable(np_x) y = fluid.dygraph.to_variable(np_y) - z = paddle.add(x, y, alpha=-0.5) + z = paddle.add(x, y) np_z = z.numpy() - z_expected = np.array([1.5, 0.5, 3.]) + z_expected = np.array([3., 8., 6.]) self.assertEqual((np_z == z_expected).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py index de0fc591b664728387ccb988f3611fe034989627..9ebaf8ff9438be8c8a57815be0798b861d05caaf 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py @@ -240,25 +240,124 @@ class TestElementwiseDivBroadcast(unittest.TestCase): self.assertEqual((out_result == (2 / x)).all(), True) -class TestDivOp(unittest.TestCase): - def test_name(self): - with fluid.program_guard(fluid.Program()): - x = fluid.data(name="x", shape=[2, 3], dtype="float32") - y = fluid.data(name='y', shape=[2, 3], dtype='float32') +class TestDivideAPI(unittest.TestCase): + def setUp(self): + paddle.set_default_dtype("float64") + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + # rule 1 + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = np.array([1, 2, 3]) + self.assertRaises(TypeError, paddle.divide, x=x, y=y) + + # rule 2: both the inputs are not Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = 2 + y = 4 + res = paddle.divide(x, y) + exe = fluid.Executor(place) + np_z = exe.run(fluid.default_main_program(), + feed={}, + fetch_list=[res]) + self.assertEqual(np_z[0] == 0.5, True) + + # rule 3: + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[3], dtype="float32") + self.assertRaises(TypeError, paddle.divide, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = 2 + exe = fluid.Executor(place) + res = x / y + np_z = exe.run(fluid.default_main_program(), + feed={"x": np.array([2, 3, 4]).astype('float64')}, + fetch_list=[res]) + z_expected = np.array([1., 1.5, 2.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 5: y is Tensor, x is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = 2 + exe = fluid.Executor(place) + res = y / x + np_z = exe.run(fluid.default_main_program(), + feed={"x": np.array([2, 8, 4]).astype('float64')}, + fetch_list=[res]) + z_expected = np.array([1., 0.25, 0.5]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 6: y is Tensor, x is Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[3], dtype="float64") + exe = fluid.Executor(place) + res = x / y + np_z = exe.run(fluid.default_main_program(), + feed={ + "x": np.array([2, 3, 4]).astype('float64'), + "y": np.array([1, 5, 2]).astype('float64') + }, + fetch_list=[res]) + z_expected = np.array([2., 0.6, 2.]) + self.assertEqual((np_z[0] == z_expected).all(), True) - y_1 = paddle.div(x, y, name='div_res') - self.assertEqual(('div_res' in y_1.name), True) + def test_static(self): + for place in self.places: + self.check_static_result(place=place) def test_dygraph(self): - with fluid.dygraph.guard(): - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') - x = fluid.dygraph.to_variable(np_x) - y = fluid.dygraph.to_variable(np_y) - z = paddle.div(x, y) - np_z = z.numpy() - z_expected = np.array([2., 0.6, 2.]) - self.assertEqual((np_z == z_expected).all(), True) + for place in self.places: + with fluid.dygraph.guard(place): + # rule 1 : avoid numpy.ndarray + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x) + self.assertRaises(TypeError, paddle.divide, x=x, y=np_y) + + # rule 2: both the inputs are not Tensor + z = paddle.divide(3, 2) + self.assertEqual(z.numpy()[0] == 1.5, True) + + # rule 3: both the inputs are Tensor + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x, dtype="float32") + y = paddle.to_tensor(np_y, dtype="float64") + self.assertRaises(TypeError, paddle.divide, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + np_x = np.array([2, 3, 4]) + x = paddle.to_tensor(np_x, dtype="int32") + y = 2 + z = x / y + z_expected = np.array([1., 1.5, 2.]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 5: y is Tensor, x is scalar + np_x = np.array([2, 1, 4]) + x = paddle.to_tensor(np_x, dtype="int32") + y = 2 + z = y / x + z_expected = np.array([1., 2., 0.5]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 6: y is Tensor, x is Tensor + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = x / y + z_expected = np.array([2., 0.6, 2.]) + self.assertEqual((z_expected == z.numpy()).all(), True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py index 104e896b6e440f5657a90e0ce741b49f72ba75c6..4fe085ce854726676bc1b1bef650419b3ebbfc86 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py @@ -15,6 +15,8 @@ from __future__ import print_function import unittest import numpy as np +import paddle +import paddle.fluid as fluid import paddle.fluid.core as core from op_test import OpTest @@ -56,6 +58,13 @@ class TestElementwiseModOp(OpTest): pass +class TestElementwiseModOpInverse(TestElementwiseModOp): + def init_input_output(self): + self.x = np.random.uniform(0, 10000, [10]).astype(self.dtype) + self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype) + self.out = np.floor_divide(self.x, self.y) + + class TestElementwiseModOp_scalar(TestElementwiseModOp): def init_input_output(self): scale_x = random.randint(0, 100000000) @@ -65,5 +74,125 @@ class TestElementwiseModOp_scalar(TestElementwiseModOp): self.out = np.floor_divide(self.x, self.y) +class TestFloorDivideAPI(unittest.TestCase): + def setUp(self): + paddle.set_default_dtype("float64") + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + # rule 1 + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = np.array([1, 2, 3]) + self.assertRaises(TypeError, paddle.floor_divide, x=x, y=y) + + # rule 2: both the inputs are not Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = 2 + y = 4 + res = paddle.floor_divide(x, y) + exe = fluid.Executor(place) + np_z = exe.run(fluid.default_main_program(), + feed={}, + fetch_list=[res]) + self.assertEqual(np_z[0] == 0., True) + + # rule 3: + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[3], dtype="float32") + self.assertRaises(TypeError, paddle.floor_divide, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = 2 + exe = fluid.Executor(place) + res = x // y + np_z = exe.run(fluid.default_main_program(), + feed={"x": np.array([2, 3, 4]).astype('float64')}, + fetch_list=[res]) + z_expected = np.array([1., 1., 2.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 5: y is Tensor, x is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = 2 + exe = fluid.Executor(place) + res = y // x + np_z = exe.run(fluid.default_main_program(), + feed={"x": np.array([2, 8, 4]).astype('float64')}, + fetch_list=[res]) + z_expected = np.array([1., 0., 0.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 6: y is Tensor, x is Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[3], dtype="float64") + exe = fluid.Executor(place) + res = x // y + np_z = exe.run(fluid.default_main_program(), + feed={ + "x": np.array([2, 3, 4]).astype('float64'), + "y": np.array([1, 5, 2]).astype('float64') + }, + fetch_list=[res]) + z_expected = np.array([2., 0., 2.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + # rule 1 : avoid numpy.ndarray + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x) + self.assertRaises(TypeError, paddle.floor_divide, x=x, y=np_y) + + # rule 2: both the inputs are not Tensor + z = paddle.floor_divide(3, 2) + self.assertEqual(z.numpy()[0] == 1., True) + + # rule 3: both the inputs are Tensor + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x, dtype="float32") + y = paddle.to_tensor(np_y, dtype="float64") + self.assertRaises(TypeError, paddle.floor_divide, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + np_x = np.array([2, 3, 4]) + x = paddle.to_tensor(np_x, dtype="int32") + y = 2 + z = x // y + z_expected = np.array([1, 1, 2]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 5: y is Tensor, x is scalar + np_x = np.array([2, 1, 4]) + x = paddle.to_tensor(np_x, dtype="int32") + y = 2 + z = y // x + z_expected = np.array([1, 2, 0]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 6: y is Tensor, x is Tensor + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = x // y + z_expected = np.array([2., 0., 2.]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py index 2c0fdf51769782e046b1b18ebd31782c81fd49f0..25769a42aa261c0b5ae9fe2795a337c668580a99 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py @@ -15,6 +15,8 @@ from __future__ import print_function import unittest import numpy as np +import paddle +import paddle.fluid as fluid import paddle.fluid.core as core from op_test import OpTest @@ -82,5 +84,126 @@ class TestElementwiseModOpDouble(TestElementwiseModOpFloat): self.dtype = np.float64 +class TestRemainderAPI(unittest.TestCase): + def setUp(self): + paddle.set_default_dtype("float64") + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + # rule 1 + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = np.array([1, 2, 3]) + self.assertRaises(TypeError, paddle.remainder, x=x, y=y) + + # rule 3: + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[3], dtype="float32") + self.assertRaises(TypeError, paddle.remainder, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = 2 + exe = fluid.Executor(place) + res = x % y + np_z = exe.run(fluid.default_main_program(), + feed={"x": np.array([2, 3, 4]).astype('float64')}, + fetch_list=[res]) + z_expected = np.array([0., 1., 0.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 5: y is Tensor, x is scalar + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = 3 + y = fluid.data(name="y", shape=[3], dtype="float32") + self.assertRaises(TypeError, paddle.remainder, x=x, y=y) + + # rule 6: y is Tensor, x is Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[3], dtype="float64") + y = fluid.data(name="y", shape=[1], dtype="float64") + exe = fluid.Executor(place) + res = x % y + np_z = exe.run(fluid.default_main_program(), + feed={ + "x": np.array([1., 2., 4]).astype('float64'), + "y": np.array([1.5]).astype('float64') + }, + fetch_list=[res]) + z_expected = np.array([1., 0.5, 1.0]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + # rule 6: y is Tensor, x is Tensor + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data(name="x", shape=[6], dtype="float64") + y = fluid.data(name="y", shape=[1], dtype="float64") + exe = fluid.Executor(place) + res = x % y + np_z = exe.run( + fluid.default_main_program(), + feed={ + "x": np.array([-3., -2, -1, 1, 2, 3]).astype('float64'), + "y": np.array([2]).astype('float64') + }, + fetch_list=[res]) + z_expected = np.array([1., 0., 1., 1., 0., 1.]) + self.assertEqual((np_z[0] == z_expected).all(), True) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + # rule 1 : avoid numpy.ndarray + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x) + self.assertRaises(TypeError, paddle.remainder, x=x, y=np_y) + + # rule 3: both the inputs are Tensor + np_x = np.array([2, 3, 4]) + np_y = np.array([1, 5, 2]) + x = paddle.to_tensor(np_x, dtype="float32") + y = paddle.to_tensor(np_y, dtype="float64") + self.assertRaises(TypeError, paddle.remainder, x=x, y=y) + + # rule 4: x is Tensor, y is scalar + np_x = np.array([2, 3, 4]) + x = paddle.to_tensor(np_x, dtype="int32") + y = 2 + z = x % y + z_expected = np.array([0, 1, 0]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 5: y is Tensor, x is scalar + np_x = np.array([2, 3, 4]) + x = paddle.to_tensor(np_x) + self.assertRaises(TypeError, paddle.remainder, x=3, y=x) + + # rule 6: y is Tensor, x is Tensor + np_x = np.array([1., 2., 4]) + np_y = np.array([1.5]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = x % y + z_expected = np.array([1., 0.5, 1.0]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + # rule 6: y is Tensor, x is Tensor + np_x = np.array([-3., -2, -1, 1, 2, 3]) + np_y = np.array([2.]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = x % y + z_expected = np.array([1., 0., 1., 1., 0., 1.]) + self.assertEqual((z_expected == z.numpy()).all(), True) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py index e86f18a62167b7feab1549072fc296f847c00491..12b75c8bf703d2b31e6abb08bb233fb2874828ce 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py @@ -29,7 +29,7 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -56,7 +56,7 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -83,7 +83,7 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -110,7 +110,7 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -137,7 +137,7 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -164,7 +164,7 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 @@ -191,7 +191,7 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.0001 dtype = np.float64 @@ -219,7 +219,7 @@ class TestElementwiseDivBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. - shape = [2, 3, 7, 9] + shape = [2, 3, 4, 5] eps = 0.0001 dtype = np.float64 diff --git a/python/paddle/fluid/tests/unittests/test_erf_op.py b/python/paddle/fluid/tests/unittests/test_erf_op.py index 93ab0212f136adfedacb52a2fde47e15edf279d3..964e704c6a2ccbdc96fc281f6e417caf8351cdf7 100644 --- a/python/paddle/fluid/tests/unittests/test_erf_op.py +++ b/python/paddle/fluid/tests/unittests/test_erf_op.py @@ -19,6 +19,7 @@ import numpy as np from scipy.special import erf from op_test import OpTest +import paddle import paddle.fluid as fluid import paddle.fluid.dygraph as dg @@ -58,6 +59,12 @@ class TestErfLayer(unittest.TestCase): if fluid.is_compiled_with_cuda(): self._test_case(fluid.CUDAPlace(0)) + def test_name(self): + with fluid.program_guard(fluid.Program()): + x = paddle.static.data('x', [3, 4]) + y = paddle.erf(x, name='erf') + self.assertTrue('erf' in y.name) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1e3c5a28a5498d1a06654ea0a4ddcac6c7592b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py @@ -0,0 +1,84 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import numpy +import paddle.fluid.core as core +import paddle.fluid as fluid + + +class TestExecutor(unittest.TestCase): + def net(self): + lr = fluid.data(name="lr", shape=[1], dtype='float32') + x = fluid.data(name="x", shape=[None, 1], dtype='float32') + y = fluid.data(name="y", shape=[None, 1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + + opt = fluid.optimizer.Adam(learning_rate=lr) + opt.minimize(avg_cost) + + return lr, avg_cost + + def test_program_check_feed(self): + main_program = fluid.Program() + startup_program = fluid.Program() + scope = fluid.Scope() + with fluid.program_guard(main_program, startup_program): + with fluid.scope_guard(scope): + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + lr, cost = self.net() + exe.run(startup_program) + train_data = [[1.0], [2.0], [3.0], [4.0]] + y_true = [[2.0], [4.0], [6.0], [8.0]] + a = 0 + with self.assertRaises(ValueError): + exe.run(feed={'x': train_data, + 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + use_prune=True) + + def test_compiled_program_check_feed(self): + main_program = fluid.Program() + startup_program = fluid.Program() + scope = fluid.Scope() + with fluid.program_guard(main_program, startup_program): + with fluid.scope_guard(scope): + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + lr, cost = self.net() + exe.run(startup_program) + compiled_prog = fluid.CompiledProgram( + main_program).with_data_parallel(loss_name=cost.name) + train_data = [[1.0], [2.0], [3.0], [4.0]] + y_true = [[2.0], [4.0], [6.0], [8.0]] + a = 0 + with self.assertRaises(ValueError): + exe.run(compiled_prog, + feed={'x': train_data, + 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + use_prune=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py new file mode 100755 index 0000000000000000000000000000000000000000..4bc6bf3744f26cf7618d255f306bdb8f5fefb7a0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py @@ -0,0 +1,132 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid + + +class TestExpandAsOpRank1(OpTest): + def setUp(self): + self.op_type = "expand_as_v2" + x = np.random.rand(100).astype("float64") + target_tensor = np.random.rand(2, 100).astype("float64") + self.inputs = {'X': x, 'target_tensor': target_tensor} + self.attrs = {} + bcast_dims = [2, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandAsOpRank2(OpTest): + def setUp(self): + self.op_type = "expand_as_v2" + x = np.random.rand(10, 12).astype("float64") + target_tensor = np.random.rand(10, 12).astype("float64") + self.inputs = {'X': x, 'target_tensor': target_tensor} + self.attrs = {} + bcast_dims = [1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandAsOpRank3(OpTest): + def setUp(self): + self.op_type = "expand_as_v2" + x = np.random.rand(2, 3, 20).astype("float64") + target_tensor = np.random.rand(2, 3, 20).astype("float64") + self.inputs = {'X': x, 'target_tensor': target_tensor} + self.attrs = {} + bcast_dims = [1, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandAsOpRank4(OpTest): + def setUp(self): + self.op_type = "expand_as_v2" + x = np.random.rand(1, 1, 7, 16).astype("float64") + target_tensor = np.random.rand(4, 6, 7, 16).astype("float64") + self.inputs = {'X': x, 'target_tensor': target_tensor} + self.attrs = {} + bcast_dims = [4, 6, 1, 1] + output = np.tile(self.inputs['X'], bcast_dims) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandAsV2Error(unittest.TestCase): + def test_errors(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + x1 = fluid.layers.data(name='x1', shape=[4], dtype="uint8") + x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32") + self.assertRaises(TypeError, paddle.tensor.expand_as, x1, x2) + x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool") + x3.stop_gradient = False + self.assertRaises(ValueError, paddle.tensor.expand_as, x3, x2) + + +# Test python API +class TestExpandAsV2API(unittest.TestCase): + def test_api(self): + input1 = np.random.random([12, 14]).astype("float32") + input2 = np.random.random([2, 12, 14]).astype("float32") + x = fluid.layers.data( + name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + + y = fluid.layers.data( + name='target_tensor', + shape=[2, 12, 14], + append_batch_size=False, + dtype="float32") + + out_1 = paddle.expand_as(x, y=y) + + exe = fluid.Executor(place=fluid.CPUPlace()) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": input1, + "target_tensor": input2}, + fetch_list=[out_1]) + assert np.array_equal(res_1[0], np.tile(input1, (2, 1, 1))) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py new file mode 100644 index 0000000000000000000000000000000000000000..aee6ca249f535b9c06c00a6806ac491be16cd4b3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -0,0 +1,234 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +import paddle + + +# Situation 1: shape is a list(without tensor) +class TestExpandV2OpRank1(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + + self.inputs = {'X': np.random.random(self.ori_shape).astype("float64")} + self.attrs = {'shape': self.shape} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.shape = [100] + self.expand_times = [1] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandV2OpRank2_DimExpanding(TestExpandV2OpRank1): + def init_data(self): + self.ori_shape = [120] + self.shape = [2, 120] + self.expand_times = [2, 1] + + +class TestExpandV2OpRank2(TestExpandV2OpRank1): + def init_data(self): + self.ori_shape = [1, 140] + self.shape = [12, 140] + self.expand_times = [12, 1] + + +class TestExpandV2OpRank3_Corner(TestExpandV2OpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.shape = (2, 10, 5) + self.expand_times = (1, 1, 1) + + +class TestExpandV2OpRank4(TestExpandV2OpRank1): + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.shape = (-1, -1, -1, -1) + self.expand_times = (1, 1, 1, 1) + + +# Situation 2: shape is a list(with tensor) +class TestExpandV2OpRank1_tensor_attr(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + expand_shapes_tensor = [] + for index, ele in enumerate(self.expand_shape): + expand_shapes_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float64"), + 'expand_shapes_tensor': expand_shapes_tensor, + } + self.attrs = {"shape": self.infer_expand_shape} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.expand_times = [1] + self.expand_shape = [100] + self.infer_expand_shape = [-1] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestExpandV2OpRank2_Corner_tensor_attr(TestExpandV2OpRank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.expand_times = [1, 1] + self.expand_shape = [12, 14] + self.infer_expand_shape = [12, -1] + + +# Situation 3: shape is a tensor +class TestExpandV2OpRank1_tensor(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float64"), + 'Shape': np.array(self.expand_shape).astype("int32"), + } + self.attrs = {} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.expand_times = [2, 1] + self.expand_shape = [2, 100] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +# Situation 4: input x is Integer +class TestExpandV2OpInteger(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("int32") + } + self.attrs = {'shape': [2, 4, 5]} + output = np.tile(self.inputs['X'], (1, 1, 1)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +# Situation 5: input x is Bool +class TestExpandV2OpBoolean(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} + self.attrs = {'shape': [2, 4, 5]} + output = np.tile(self.inputs['X'], (1, 1, 1)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +# Situation 56: input x is Integer +class TestExpandV2OpInt64_t(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("int64") + } + self.attrs = {'shape': [2, 4, 5]} + output = np.tile(self.inputs['X'], (1, 1, 1)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestExpandV2Error(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + x1 = fluid.create_lod_tensor( + np.array([[-1]]), [[1]], fluid.CPUPlace()) + shape = [2, 2] + self.assertRaises(TypeError, paddle.tensor.expand, x1, shape) + x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") + self.assertRaises(TypeError, paddle.tensor.expand, x2, shape) + x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool") + x3.stop_gradient = False + self.assertRaises(ValueError, paddle.tensor.expand, x3, shape) + + +# Test python API +class TestExpandV2API(unittest.TestCase): + def test_api(self): + input = np.random.random([12, 14]).astype("float32") + x = fluid.layers.data( + name='x', shape=[12, 14], append_batch_size=False, dtype="float32") + + positive_2 = fluid.layers.fill_constant([1], "int32", 12) + expand_shape = fluid.layers.data( + name="expand_shape", + shape=[2], + append_batch_size=False, + dtype="int32") + + out_1 = paddle.expand(x, shape=[12, 14]) + out_2 = paddle.expand(x, shape=[positive_2, 14]) + out_3 = paddle.expand(x, shape=expand_shape) + + g0 = fluid.backward.calc_gradient(out_2, x) + + exe = fluid.Executor(place=fluid.CPUPlace()) + res_1, res_2, res_3 = exe.run(fluid.default_main_program(), + feed={ + "x": input, + "expand_shape": + np.array([12, 14]).astype("int32") + }, + fetch_list=[out_1, out_2, out_3]) + assert np.array_equal(res_1, np.tile(input, (1, 1))) + assert np.array_equal(res_2, np.tile(input, (1, 1))) + assert np.array_equal(res_3, np.tile(input, (1, 1))) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py index 0812b02b47db7fa2d43e1d3bbd0a3f7b59911326..b30e0a6775ea9901d8c2a3a56b2e80141fffd23c 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py @@ -31,45 +31,45 @@ def dequantize_max_abs(x, scale, max_range): return y -def channel_wise_quantize_max_abs(x, quant_bit=8, use_second_dim=False): +def channel_wise_quantize_max_abs(x, quant_bit=8, quant_axis=0): + assert quant_axis in [0, 1], "The quant_axis should be 0 or 1." scales = [] - if not use_second_dim: + y = x.copy() + max_range = math.pow(2, quant_bit - 1) - 1 + if quant_axis == 0: for i in range(x.shape[0]): - scales.append(np.max(np.abs(x[i])).astype("float32")) - y = x.copy() - max_range = math.pow(2, quant_bit - 1) - 1 - for i, scale in enumerate(scales): - y[i] = np.round(x[i] / scale * max_range) - else: - for i in range(x.shape[0]): - s = [] - for j in range(x.shape[1]): - s.append(np.max(np.abs(x[i][j])).astype("float32")) - scales.append(s) - scales = np.amax(np.array(scales), axis=0) - y = x.copy() - max_range = math.pow(2, quant_bit - 1) - 1 - for i in range(x.shape[0]): - for j, scale in enumerate(scales): - y[i][j] = np.round(x[i][j] / scale * max_range) + scale = np.max(np.abs(x[i])).astype("float32") + scales.append(scale) + y[i] = np.round(x[i] * max_range / scale) + elif quant_axis == 1: + for i in range(x.shape[1]): + scale = np.max(np.abs(x[:, i])).astype("float32") + scales.append(scale) + y[:, i] = np.round(x[:, i] * max_range / scale) return y, scales def channel_wise_dequantize_max_abs(x, scales, quant_bits, + quant_axis, activation_scale=None): - if activation_scale is None: - y = x.copy() - for i in range(x.shape[0]): - y[i] = (scales[i] / (math.pow(2, quant_bits[0] - 1) - 1)) * x[i] + assert quant_axis in [0, 1], "The quant_axis should be 0 or 1." + + if isinstance(quant_bits, list): + max_range = math.pow(2, quant_bits[0] - 1) - 1 else: - y = x.copy() + max_range = math.pow(2, quant_bits - 1) - 1 + y = x.copy() + if quant_axis == 0: for i in range(x.shape[0]): - for j in range(x.shape[1]): - y[i][j] = (scales[j] / - (math.pow(2, quant_bits[0] - 1) - 1)) * x[i][j] - y *= activation_scale / (math.pow(2, quant_bits[1] - 1) - 1) + y[i] = x[i] * scales[i] / max_range + elif quant_axis == 1: + for i in range(x.shape[1]): + y[:, i] = x[:, i] * scales[i] / max_range + + if activation_scale is not None: + y = y * activation_scale / (math.pow(2, quant_bits[1] - 1) - 1) return y @@ -83,9 +83,8 @@ class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest): self.set_args() self.op_type = "fake_channel_wise_dequantize_max_abs" x = np.random.randn(4, 3, 64, 64).astype(self.data_type) - yq, scales = channel_wise_quantize_max_abs( - x, self.quant_bits[0], use_second_dim=True) - ydq = channel_wise_dequantize_max_abs(yq, scales, self.quant_bits, + yq, scales = channel_wise_quantize_max_abs(x, self.quant_bits[0], 1) + ydq = channel_wise_dequantize_max_abs(yq, scales, self.quant_bits, 1, self.activation_scale) self.inputs = { @@ -105,25 +104,39 @@ class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest): def set_args(self): self.quant_bits = [8] self.data_type = "float32" + self.quant_axis = 0 def setUp(self): self.set_args() self.op_type = "fake_channel_wise_dequantize_max_abs" x = np.random.randn(4, 3, 64, 64).astype(self.data_type) - yq, scales = channel_wise_quantize_max_abs(x, self.quant_bits[0]) - ydq = channel_wise_dequantize_max_abs(yq, scales, self.quant_bits) + yq, scales = channel_wise_quantize_max_abs(x, self.quant_bits[0], + self.quant_axis) + ydq = channel_wise_dequantize_max_abs(yq, scales, self.quant_bits, + self.quant_axis) self.inputs = { 'X': yq, 'Scales': [("scales0", np.array(scales).astype(self.data_type))] } - self.attrs = {'quant_bits': self.quant_bits} + self.attrs = { + 'quant_bits': self.quant_bits, + 'quant_axis': self.quant_axis + } self.outputs = {'Out': ydq} def test_check_output(self): self.check_output() +class TestFakeChannelWiseDequantizeMaxAbsOpOneScale1( + TestFakeChannelWiseDequantizeMaxAbsOpOneScale): + def set_args(self): + self.quant_bits = [8] + self.data_type = "float32" + self.quant_axis = 1 + + class TestFakeDequantizeMaxAbsOp(OpTest): def set_args(self): self.num_bits = 8 diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 1c8335e3bceab24cba9364a96f6907d2cf585fe0..7835fd3f53ddb7f9a95313c6cc5fc7b72ae6d664 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -72,28 +72,62 @@ class TestFakeQuantizeOp2(OpTest): class TestFakeChannelWiseQuantizeOp(OpTest): def setUp(self): + self.set_arg() + assert self.quant_axis in [0, 1], "quant_axis should be 0 or 1." + self.op_type = "fake_channel_wise_quantize_abs_max" - self.attrs = {'bit_length': 8} - self.inputs = { - 'X': np.random.random((4, 3, 64, 64)).astype("float32"), - } + self.attrs = {'bit_length': 8, 'quant_axis': self.quant_axis} + scales = [] - for i in range(self.inputs['X'].shape[0]): - scales.append(np.max(np.abs(self.inputs['X'][i])).astype("float32")) outputs = self.inputs['X'].copy() - for i, scale in enumerate(scales): - outputs[i] = np.round(outputs[i] / scale * ( - (1 << (self.attrs['bit_length'] - 1)) - 1)) + bnt = (1 << (self.attrs['bit_length'] - 1)) - 1 + if self.quant_axis == 0: + for i in range(self.inputs['X'].shape[0]): + scale_v = np.max(np.abs(self.inputs['X'][i])).astype("float32") + scales.append(scale_v) + outputs[i] = np.round(outputs[i] / scale_v * bnt) + elif self.quant_axis == 1: + for i in range(self.inputs['X'].shape[1]): + scale_v = np.max(np.abs(self.inputs['X'][:, i])).astype( + "float32") + scales.append(scale_v) + outputs[:, i] = np.round(outputs[:, i] / scale_v * bnt) self.outputs = { 'Out': outputs, 'OutScale': np.array(scales).astype("float32"), } + def set_arg(self): + self.quant_axis = 0 + self.inputs = { + 'X': np.random.random((20, 15, 6, 6)).astype("float32"), + } + def test_check_output(self): self.check_output() +class TestFakeChannelWiseQuantizeOp1(TestFakeChannelWiseQuantizeOp): + def set_quant_axis(self): + self.quant_axis = 1 + self.inputs = { + 'X': np.random.random((15, 20, 5, 5)).astype("float32"), + } + + +class TestFakeChannelWiseQuantizeOp2(TestFakeChannelWiseQuantizeOp): + def set_quant_axis(self): + self.quant_axis = 0 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + +class TestFakeChannelWiseQuantizeOp3(TestFakeChannelWiseQuantizeOp): + def set_quant_axis(self): + self.quant_axis = 1 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + class TestFakeQuantizeRangeAbsMaxOp(OpTest): def setUp(self): self.op_type = "fake_quantize_range_abs_max" diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index 4bd56802efd462ff63498f54699012776ce6f47c..642044bb4b1152b0c6d2b5a8a64e22410f9bd151 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -145,19 +145,22 @@ class TestFlatten2OpError(unittest.TestCase): x = x.astype('float32') def test_ValueError1(): - x_var = paddle.nn.data(name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data( + name="x", shape=image_shape, dtype='float32') out = paddle.flatten(x_var, start_axis=2, stop_axis=1) self.assertRaises(ValueError, test_ValueError1) def test_ValueError2(): - x_var = paddle.nn.data(name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data( + name="x", shape=image_shape, dtype='float32') paddle.flatten(x_var, start_axis=10, stop_axis=1) self.assertRaises(ValueError, test_ValueError2) def test_ValueError3(): - x_var = paddle.nn.data(name="x", shape=image_shape, dtype='float32') + x_var = paddle.static.data( + name="x", shape=image_shape, dtype='float32') paddle.flatten(x_var, start_axis=2, stop_axis=10) self.assertRaises(ValueError, test_ValueError3) diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py index 449f31faf4035971f996e76612f10c882ce9179c..a705d5ee661fd5d0d28b791d6db4624b78281743 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet.py +++ b/python/paddle/fluid/tests/unittests/test_fleet.py @@ -34,7 +34,8 @@ class TestFleet1(unittest.TestCase): def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces @@ -48,10 +49,10 @@ class TestFleet1(unittest.TestCase): os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() - role_maker.generate_role() + #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) - fleet.init(role_maker) + #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py index 0e19069d5c04e7e6bf05be8f3f48a7ce395a0a57..38c3903306e6e76188cdb50476d6797814c434e9 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker import unittest import paddle import os @@ -23,8 +25,6 @@ class TestFleetAMPOptimizer(unittest.TestCase): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" def test_amp_optimizer(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) input_x = paddle.fluid.layers.data( @@ -51,7 +51,7 @@ class TestFleetAMPOptimizer(unittest.TestCase): "custom_black_list": ['tanh'], } - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 3a79b694cad5b0cb3fe0a08b6a18506510eead5b..9e651dea24ba7f35f3785093da8ac73dde07be5a 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -14,7 +14,10 @@ import unittest import paddle +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker import os +import paddle.fluid as fluid class TestFleetBase(unittest.TestCase): @@ -26,67 +29,49 @@ class TestFleetBase(unittest.TestCase): "127.0.0.1:36001,127.0.0.2:36001" def test_init(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) def test_is_first_worker(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_first_worker(): print("test fleet first worker done.") def test_worker_index(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print(fleet.worker_index()) def test_worker_num(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print(fleet.worker_num()) def test_is_worker(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_worker(): print("test fleet is worker") def test_worker_endpoints(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print(fleet.worker_endpoints(to_string=True)) def test_server_num(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_server(): print("fleet server num: {}".format(fleet.server_num())) def test_server_index(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_server(): print("fleet server index: {}".format(fleet.server_index())) def test_server_endpoints(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_server(): @@ -94,83 +79,50 @@ class TestFleetBase(unittest.TestCase): fleet.server_endpoints(to_string=True))) def test_is_server(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_server(): print("test fleet is server") def test_util(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) self.assertEqual(fleet.util, None) def test_barrier_worker(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_worker(): fleet.barrier_worker() def test_init_worker(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_worker(): fleet.init_worker() def test_run_server(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_worker(): fleet.run_worker() def test_stop_worker(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) if fleet.is_worker(): fleet.stop_worker() def test_distributed_optimizer(self): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) optimizer = paddle.optimizer.SGD(learning_rate=0.001) optimizer = fleet.distributed_optimizer(optimizer) - def test_minimize(self): - import paddle + def test_exception(self): import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker - - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) - - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - strategy = fleet.DistributedStrategy() - optimizer = paddle.optimizer.SGD(learning_rate=0.001) - optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) - optimizer.minimize(avg_cost) + self.assertRaises(Exception, fleet.init_worker) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py new file mode 100644 index 0000000000000000000000000000000000000000..d666ea6740be149723e3bdbc00857a8931ce318e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py @@ -0,0 +1,101 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import os +import paddle.fluid as fluid + + +class TestFleetBase(unittest.TestCase): + def setUp(self): + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ + "127.0.0.1:36001,127.0.0.2:36001" + + def test_ps_minimize(self): + import paddle + import paddle.distributed.fleet as fleet + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + role = fleet.PaddleCloudRoleMaker(is_collective=False) + fleet.init(role) + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.a_sync = False + optimizer = paddle.optimizer.SGD(learning_rate=0.001) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + pe = fluid.ParallelExecutor(use_cuda=False, loss_name=avg_cost.name) + compiled_prog = fluid.compiler.CompiledProgram( + fluid.default_main_program()) + self.assertRaises( + Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['x', 'y'], + target_vars=[avg_cost], + executor=pe) + + self.assertRaises( + Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['x', 'y'], + target_vars=[avg_cost], + executor="exe") + + self.assertRaises( + Exception, + fleet.save_inference_model, + dirname='/tmp/', + feeded_var_names=['x', 'y'], + target_vars=[avg_cost], + executor=exe, + main_program=compiled_prog) + + self.assertRaises( + Exception, fleet.save_persistables, executor=pe, dirname='/tmp/') + + self.assertRaises( + Exception, fleet.save_persistables, executor="exe", dirname='/tmp/') + + self.assertRaises( + Exception, + fleet.save_persistables, + executor=exe, + dirname='/tmp/', + main_program=compiled_prog) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e888ab0eb3ca597bf62245ff9f3024fe81ee95 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py @@ -0,0 +1,52 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import paddle +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker +import paddle.fluid as fluid + + +class TestFleetBase(unittest.TestCase): + def setUp(self): + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ + "127.0.0.1:36001,127.0.0.2:36001" + + def test_collective_minimize(self): + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + strategy = fleet.DistributedStrategy() + optimizer = paddle.optimizer.SGD(learning_rate=0.001) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_4.py b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py new file mode 100644 index 0000000000000000000000000000000000000000..1b3fbb86a4af55d6838df3a628bf2cf194c5235d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py @@ -0,0 +1,45 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import os +import paddle.fluid as fluid + + +class TestFleetBase(unittest.TestCase): + def setUp(self): + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ + "127.0.0.1:36001,127.0.0.2:36001" + + def test_fleet_init(self): + import paddle.distributed.fleet as fleet + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + role = fleet.PaddleCloudRoleMaker(is_collective=False) + fleet.init(role) + fleet.init() + fleet.init(is_collective=False) + self.assertRaises(Exception, fleet.init, is_collective="F") + self.assertRaises(Exception, fleet.init, role_maker="F") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py index 1d211a77008b47fadd5908e9d8382bdaaaf77eb9..55d4ff7726aace09e486156d26efdecf22b310a5 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py @@ -17,7 +17,7 @@ import paddle from paddle import fluid import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetDGCOptimizer(unittest.TestCase): @@ -60,7 +60,8 @@ class TestFleetDGCOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum( + learning_rate=0.01, momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -72,7 +73,7 @@ class TestFleetDGCOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Adam(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -87,7 +88,8 @@ class TestFleetDGCOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum( + learning_rate=0.01, momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py index 45dd461237ba5ba47d2b39e6b53279372d0723cb..40e0168e1ac93dfd93a99c19eced05756a49471f 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py @@ -289,6 +289,11 @@ class TestStrategyConfig(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.execution_strategy = exe_strategy + def test_unknown_strategy(self): + strategy = paddle.distributed.fleet.DistributedStrategy() + with self.assertRaises(TypeError): + strategy.unknown_key = 'UNK' + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py index 49ce09877f0a0fb91d398eb6eba57ada323f96a0..af72df5186876a8bcbaa5bfa6d71a27fdf46b119 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_gradient_merge_meta_optimizer.py @@ -16,7 +16,7 @@ import unittest import paddle import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): @@ -44,7 +44,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.gradient_merge = True strategy.gradient_merge_configs = {"k_steps": 2, "avg": True} - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py index 3e97ab3bfc66ca5becfba2ed94ce94504d100936..cac2698d33615f24da735f881faae60c51a34027 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py @@ -15,7 +15,7 @@ import unittest import paddle import os -from launch_function_helper import launch_func +from launch_function_helper import launch_func, _find_free_port class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): @@ -38,6 +38,132 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): "https_proxy": "" } + def node_func(): + import paddle.distributed.fleet as fleet + import paddle.fluid.incubate.fleet.base.role_maker as role_maker + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data( + name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], + size=2, + act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer( + optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + proc_a = launch_func(node_func, node_a) + proc_a.start() + proc_b = launch_func(node_func, node_b) + proc_b.start() + proc_a.join() + proc_b.join() + + def test_graph_execution_optimizer(self): + + port_set = set() + port_a = _find_free_port(port_set) + port_b = _find_free_port(port_set) + + node_a = { + "PADDLE_TRAINER_ID": "0", + "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_a), + "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ENDPOINTS": + "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), + "http_proxy": "", + "https_proxy": "" + } + + node_b = { + "PADDLE_TRAINER_ID": "1", + "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:{}".format(port_b), + "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ENDPOINTS": + "127.0.0.1:{},127.0.0.1:{}".format(port_a, port_b), + "http_proxy": "", + "https_proxy": "" + } + + def node_func(): + import paddle.distributed.fleet as fleet + import paddle.fluid.incubate.fleet.base.role_maker as role_maker + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data( + name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], + size=2, + act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.nccl_comm_num = 2 + strategy.sync_nccl_allreduce = True + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer( + optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace()) + exe.run(paddle.fluid.default_startup_program()) + + import numpy as np + + def gen_data(): + return { + "x": np.random.random(size=(128, 32)).astype('float32'), + "y": np.random.randint( + 2, size=(128, 1)).astype('int64') + } + + for i in range(10): + cost_val = exe.run(feed=gen_data(), fetch_list=[avg_cost.name]) + print("cost of step[{}] = {}".format(i, cost_val)) + + proc_a = launch_func(node_func, node_a) + proc_a.start() + proc_b = launch_func(node_func, node_b) + proc_b.start() + proc_a.join() + proc_b.join() + + def test_graph_execution_optimizer_not_apply_v2(self): + node_a = { + "PADDLE_TRAINER_ID": "0", + "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36003", + "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36003,127.0.0.1:36004", + "http_proxy": "", + "https_proxy": "" + } + + node_b = { + "PADDLE_TRAINER_ID": "1", + "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36004", + "PADDLE_TRAINERS_NUM": "2", + "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36003,127.0.0.1:36004", + "http_proxy": "", + "https_proxy": "" + } + def node_func(): import paddle.distributed.fleet as fleet import paddle.fluid.incubate.fleet.base.role_maker as role_maker @@ -111,7 +237,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py index d2e0112ba298cf7d8267c2194b44213c150f41e4..69f5b134888b0f3268cea112eeefd9fb7fd0127f 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_executor.py @@ -14,6 +14,8 @@ import unittest import paddle +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker import os from launch_function_helper import launch_func @@ -39,8 +41,6 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): } def node_func(): - import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) input_x = paddle.fluid.layers.data( @@ -60,7 +60,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.nccl_comm_num = 2 strategy.sync_nccl_allreduce = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py index 8ad051924f2740dceaa0b8d5b9f66ba0dd743f36..3f140f53b043b1949572f3728ca8a0c556317783 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lamb_meta_optimizer.py @@ -17,7 +17,7 @@ import paddle from paddle import fluid import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetLambMetaOptimizer(unittest.TestCase): @@ -62,7 +62,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Adam(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -75,7 +75,8 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Momentum(learning_rate=0.1, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum( + learning_rate=0.1, momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -88,7 +89,7 @@ class TestFleetLambMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Adam(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01) strategy.lamb_configs = { 'lamb_weight_decay': 0.01, 'exclude_from_weight_decay': ['.b_0'], diff --git a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py index 87c4823693e2e3f9cc759ac22029c8c12841d35d..3caa1a4eac0bf191b13e6708b1a9adffdb111ca7 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_lars_meta_optimizer.py @@ -17,7 +17,7 @@ import paddle from paddle import fluid import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetLarsMetaOptimizer(unittest.TestCase): @@ -62,7 +62,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + optimizer = paddle.fluid.optimizer.Momentum( + learning_rate=0.01, momentum=0.9) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -75,7 +76,7 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase): startup_prog = fluid.Program() train_prog = fluid.Program() avg_cost, strategy = self.net(train_prog, startup_prog) - optimizer = paddle.optimizer.Adam(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py index f4bb87048494974fd4cf855573d47c5f9dabb4d9..07b988bf8752057e68925bc42f564a72d466361d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_localsgd_meta_optimizer.py @@ -17,7 +17,7 @@ import paddle import os import paddle.distributed.fleet as fleet -import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.distributed.fleet.base.role_maker as role_maker class TestFleetLocalSGDMetaOptimizer(unittest.TestCase): @@ -46,7 +46,7 @@ class TestFleetLocalSGDMetaOptimizer(unittest.TestCase): config['k_steps'] = 1 strategy.localsgd_configs = config - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py new file mode 100755 index 0000000000000000000000000000000000000000..dfea848aadfc44c57c91c11d196eff49d57cab08 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_meta_optimizer_base.py @@ -0,0 +1,58 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +from paddle import fluid +import os +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker +from paddle.distributed.fleet.meta_optimizers.meta_optimizer_base import MetaOptimizerBase + + +class TestFleetMetaOptimizerBase(unittest.TestCase): + def net(main_prog, startup_prog): + with fluid.program_guard(main_prog, startup_prog): + with fluid.unique_name.guard(): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data( + name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, + size=64, + act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], + size=2, + act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + opt = MetaOptimizerBase(optimizer) + opt_ops, params_grads = opt.minimize(avg_cost) + opt.apply_optimize(avg_cost, + paddle.static.default_startup_program(), + params_grads) + return None + + net(fluid.default_startup_program(), fluid.default_main_program()) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py index 7b7e3c7c4173fe34368d6f4207491b3800907f57..b2b6136797ba460f9f829d5df4c7041664b424cb 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py @@ -33,7 +33,8 @@ class TestFleet1(unittest.TestCase): def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces @@ -47,10 +48,10 @@ class TestFleet1(unittest.TestCase): os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() - role_maker.generate_role() + #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) - fleet.init(role_maker) + #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py index d35f2fe5e62884304fa1dfbbc7fbede234ef84b4..adbb1268c6f4d7b21876dacdbbb3cf453a14d0f4 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py @@ -25,7 +25,7 @@ class TestFleetMetaOptimizer(unittest.TestCase): def test_pipeline_optimizer(self): import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker + import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) with paddle.fluid.device_guard("cpu"): @@ -53,7 +53,7 @@ class TestFleetMetaOptimizer(unittest.TestCase): strategy.pipeline = True strategy.pipeline_configs = {'micro_batch': 2} - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py index f07c6421192a0f873d477048589b3b712f2e59e7..a42010a4eaa5066821adb817e7a5df2b81bedf7c 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py @@ -27,7 +27,7 @@ class TestFleetRecomputeMetaOptimizer(unittest.TestCase): def test_recompute_optimizer(self): import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker + import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) input_x = paddle.fluid.layers.data( @@ -43,9 +43,9 @@ class TestFleetRecomputeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.recompute = True - strategy.recompute_configs = {"checkpoints": ["fc2"]} + strategy.recompute_configs = {"checkpoints": ["fc_1.tmp_0"]} - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py index 3abad755ac1755ddd62859fae45a14e6aaf528ee..7f1ad5d52d8f0b5a6b5bd83ea3a158f123e870ea 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py @@ -61,7 +61,8 @@ class TestCloudRoleMaker(unittest.TestCase): def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces @@ -75,10 +76,11 @@ class TestCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() - role_maker.generate_role() + #print("init rolemaker") + #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) - fleet.init(role_maker) + #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py index 39d3d2a2a042c74f2af0e92dd740a28ef60a5d5d..0fa852eeeebe9c8fbb056fca388a0af2c8f92842 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py @@ -33,7 +33,8 @@ class TestCloudRoleMaker(unittest.TestCase): def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces @@ -50,10 +51,10 @@ class TestCloudRoleMaker(unittest.TestCase): init_timeout_seconds=100, run_timeout_seconds=100, http_ip_port="127.0.0.1:36003") - role_maker.generate_role() + #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) - fleet.init(role_maker) + #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index f80d45ed5e09d026f7a971ad328f656baeb37a66..cf9b3e1e9a1605a714b47d99183511b24c903722 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -34,6 +34,7 @@ class TestRoleMakerBase(unittest.TestCase): self.assertRaises(Exception, role.worker_index) self.assertRaises(Exception, role.server_index) self.assertRaises(Exception, role.role_id) + self.assertRaises(Exception, role.node_num) trainer_endpoints = role.get_trainer_endpoints() self.assertTrue(len(trainer_endpoints) == 0) @@ -80,10 +81,12 @@ class TestCloudRoleMaker(unittest.TestCase): worker_endpoints = ro.get_trainer_endpoints() self.assertEqual(worker_endpoints[0], '127.0.0.1:36001') self.assertEqual(ro.role_id(), 0) + self.assertEqual(ro.node_num(), 2) def test_tr_rolemaker_collective(self): ro = role_maker.PaddleCloudRoleMaker(is_collective=True) self.assertEqual(ro.worker_num(), 2) + self.assertEqual(ro.node_num(), 2) def test_ps_rolemaker(self): """Test ps rolemaker.""" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py index 3fd646f4340dc013c10411272bb5c0c7cd52b011..80109716a54e52dc6050b724046561f37020a645 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py @@ -25,6 +25,8 @@ class TestFleetRuntime(unittest.TestCase): base._init_server() base._run_server() base._stop_worker() + base._save_inference_model() + base._save_persistables() def test_fleet_collective_runtime(self): import paddle.distributed.fleet.runtime @@ -35,6 +37,27 @@ class TestFleetRuntime(unittest.TestCase): collective_runtime._init_worker() collective_runtime._run_server() collective_runtime._stop_worker() + collective_runtime._save_inference_model() + collective_runtime._save_persistables() + + def test_fleet_ps_runtime(self): + ps_runtime = paddle.distributed.fleet.runtime.ParameterServerRuntime() + self.assertRaises(Exception, ps_runtime._get_optimizer_status, + "test_op", None) + reshaped_names, origin_names = ps_runtime._get_optimizer_status("adam", + "param") + self.assertTrue( + len(reshaped_names) == 2 and + reshaped_names[0] == 'param_moment1_0' and + reshaped_names[1] == 'param_moment2_0') + self.assertTrue( + len(origin_names) == 2 and + origin_names[0] == 'param_beta1_pow_acc_0' and + origin_names[1] == 'param_beta2_pow_acc_0') + + reshaped_names, origin_names = ps_runtime._get_optimizer_status("sgd", + "param") + self.assertTrue(len(reshaped_names) == 0 and len(origin_names) == 0) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py index 3b0e8be63d95f29ccd1da145403a7a441698fead..7a255e5da14dacc1a5552642640e3ffe1e4eaad4 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py @@ -33,7 +33,8 @@ class TestFleet1(unittest.TestCase): def test_pslib_1(self): """Test cases for pslib.""" import paddle.fluid as fluid - from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + from paddle.fluid.incubate.fleet.parameter_server.pslib import PSLib from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker try: import netifaces @@ -47,10 +48,10 @@ class TestFleet1(unittest.TestCase): os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36002" os.environ["PADDLE_TRAINER_ID"] = "0" role_maker = GeneralRoleMaker() - role_maker.generate_role() + #role_maker.generate_role() place = fluid.CPUPlace() exe = fluid.Executor(place) - fleet.init(role_maker) + #fleet.init(role_maker) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_util.py b/python/paddle/fluid/tests/unittests/test_fleet_util.py index 8dbf97b11239b25a7bff01f87344d80988830217..dde36e073fb20eed3b17c79a886739f59ecb185d 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_util.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_util.py @@ -56,7 +56,7 @@ class TestFleetUtil(unittest.TestCase): def test_get_util(self): import paddle.distributed.fleet as fleet - import paddle.fluid.incubate.fleet.base.role_maker as role_maker + import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) default_util = fleet.util @@ -72,7 +72,7 @@ class TestFleetUtil(unittest.TestCase): def get_user_id(self): return 10 - import paddle.fluid.incubate.fleet.base.role_maker as role_maker + import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) my_util = UserDefinedUtil() diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py index c43454eaaee9e3b2f9aa371453e58b009c99a52c..68be0bf5d561ef0d8fe92005dd9ddb47c21aca51 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py @@ -37,7 +37,6 @@ class TestFunctionalConv2D(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def prepare(self): @@ -88,7 +87,6 @@ class TestFunctionalConv2D(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) @@ -121,9 +119,11 @@ class TestFunctionalConv2D(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + + if self.act == 'sigmoid': + y = F.sigmoid(y) + exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -144,10 +144,12 @@ class TestFunctionalConv2D(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + + if self.act == 'sigmoid': + y = F.sigmoid(y) + out = y.numpy() return out @@ -185,7 +187,6 @@ class TestFunctionalConv2DError(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def test_exception(self): @@ -228,9 +229,7 @@ class TestFunctionalConv2DError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) class TestFunctionalConv2DCase2(TestFunctionalConv2D): @@ -383,21 +382,6 @@ class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): self.data_format = "NCHW" -class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" - self.data_format = "NCHW" - - class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): def setUp(self): self.in_channels = 3 diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py index 21986f1b98d869289ddb34a65316aca57c83f9d9..1fb07bf4345909deb5485a89232270336658ae8b 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py @@ -37,8 +37,6 @@ class TestFunctionalConv2D(TestCase): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def prepare(self): @@ -90,8 +88,6 @@ class TestFunctionalConv2D(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, - act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) exe.run(start) @@ -115,7 +111,7 @@ class TestFunctionalConv2D(TestCase): "weight", self.weight.shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, None if self.no_bias else bias, @@ -124,9 +120,7 @@ class TestFunctionalConv2D(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -140,7 +134,7 @@ class TestFunctionalConv2D(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, bias, @@ -148,10 +142,8 @@ class TestFunctionalConv2D(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) out = y.numpy() return out @@ -189,8 +181,6 @@ class TestFunctionalConv2DError(TestCase): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" def test_exception(self): @@ -225,7 +215,7 @@ class TestFunctionalConv2DError(TestCase): "weight", self.weight_shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv2d_transpose( + y = F.conv_transpose2d( x, weight, None if self.no_bias else bias, @@ -234,9 +224,7 @@ class TestFunctionalConv2DError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) class TestFunctionalConv2DCase2(TestFunctionalConv2D): @@ -249,8 +237,6 @@ class TestFunctionalConv2DCase2(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -264,8 +250,6 @@ class TestFunctionalConv2DCase3(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = True - self.act = None - self.use_cudnn = True self.data_format = "NCHW" @@ -279,8 +263,6 @@ class TestFunctionalConv2DCase4(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -294,8 +276,6 @@ class TestFunctionalConv2DCase5(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -309,8 +289,6 @@ class TestFunctionalConv2DCase6(TestFunctionalConv2D): self.dilation = (2, 1) self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -324,8 +302,6 @@ class TestFunctionalConv2DCase7(TestFunctionalConv2D): self.dilation = 1 self.groups = 4 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NHWC" @@ -340,8 +316,6 @@ class TestFunctionalConv2DCase8(TestFunctionalConv2D): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -355,8 +329,6 @@ class TestFunctionalConv2DCase9(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -370,8 +342,6 @@ class TestFunctionalConv2DCase10(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -385,8 +355,6 @@ class TestFunctionalConv2DCase11(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -400,8 +368,6 @@ class TestFunctionalConv2DCase12(TestFunctionalConv2D): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -415,8 +381,6 @@ class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -430,8 +394,6 @@ class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NHWC" @@ -445,8 +407,6 @@ class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -460,23 +420,6 @@ class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" self.data_format = "NCHW" @@ -491,8 +434,6 @@ class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" @@ -506,8 +447,6 @@ class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): self.dilation = 1 self.groups = 1 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "not_valid" @@ -521,8 +460,6 @@ class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): self.dilation = 1 self.groups = 2 self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCHW" diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py index 195e3812f94843f6ccdd05cbc317238765e4c06b..b413a56c07a9ce3afbe15baffbffaf92a3d42129 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py @@ -37,7 +37,6 @@ class TestFunctionalConv3D(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def prepare(self): @@ -88,7 +87,6 @@ class TestFunctionalConv3D(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) @@ -121,9 +119,11 @@ class TestFunctionalConv3D(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + + if self.act == 'sigmoid': + y = F.sigmoid(y) + exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -144,10 +144,12 @@ class TestFunctionalConv3D(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + + if self.act == 'sigmoid': + y = F.sigmoid(y) + out = y.numpy() return out @@ -185,7 +187,6 @@ class TestFunctionalConv3DError(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def test_exception(self): @@ -228,9 +229,10 @@ class TestFunctionalConv3DError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + + if self.act == 'sigmoid': + y = F.sigmoid(y) class TestFunctionalConv3DCase2(TestFunctionalConv3D): @@ -244,7 +246,6 @@ class TestFunctionalConv3DCase2(TestFunctionalConv3D): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -259,7 +260,6 @@ class TestFunctionalConv3DCase3(TestFunctionalConv3D): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -274,7 +274,6 @@ class TestFunctionalConv3DCase4(TestFunctionalConv3D): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -289,7 +288,6 @@ class TestFunctionalConv3DCase5(TestFunctionalConv3D): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -304,7 +302,6 @@ class TestFunctionalConv3DCase6(TestFunctionalConv3D): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -319,7 +316,6 @@ class TestFunctionalConv3DCase7(TestFunctionalConv3D): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -349,7 +345,6 @@ class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NCDHW" @@ -364,7 +359,6 @@ class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "not_valid" @@ -379,22 +373,6 @@ class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NCDHW" - - -class TestFunctionalConv3DErrorCase6(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" self.data_format = "NCDHW" @@ -409,7 +387,6 @@ class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "not_valid" @@ -424,7 +401,6 @@ class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -439,7 +415,6 @@ class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NCDHW" @@ -454,7 +429,6 @@ class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NDHWC" diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py index f8e7818315fa077df4d8ad0d6d3f76b47501b5e9..7441f7cb915e8b1fdd2155fff79e145fb6a00c0f 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py @@ -38,7 +38,6 @@ class TestFunctionalConv3DTranspose(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def prepare(self): @@ -90,7 +89,6 @@ class TestFunctionalConv3DTranspose(TestCase): param_attr=I.NumpyArrayInitializer(self.weight), bias_attr=False if self.no_bias else I.NumpyArrayInitializer(self.bias), - use_cudnn=self.use_cudnn, act=self.act, data_format=self.data_format) exe = fluid.Executor(self.place) @@ -115,7 +113,7 @@ class TestFunctionalConv3DTranspose(TestCase): "weight", self.weight.shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias.shape, dtype=self.dtype) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, None if self.no_bias else bias, @@ -124,9 +122,9 @@ class TestFunctionalConv3DTranspose(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) exe = fluid.Executor(self.place) exe.run(start) feed_dict = {"input": self.input, "weight": self.weight} @@ -140,7 +138,7 @@ class TestFunctionalConv3DTranspose(TestCase): x = dg.to_variable(self.input) weight = dg.to_variable(self.weight) bias = None if self.no_bias else dg.to_variable(self.bias) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, bias, @@ -148,10 +146,10 @@ class TestFunctionalConv3DTranspose(TestCase): padding=self.padding, stride=self.stride, dilation=self.dilation, - act=self.act, groups=self.groups, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) out = y.numpy() return out @@ -190,7 +188,6 @@ class TestFunctionalConv3DTransposeError(TestCase): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" def test_exception(self): @@ -225,7 +222,7 @@ class TestFunctionalConv3DTransposeError(TestCase): "weight", self.weight_shape, dtype=self.dtype) if not self.no_bias: bias = fluid.data("bias", self.bias_shape, dtype=self.dtype) - y = F.conv3d_transpose( + y = F.conv_transpose3d( x, weight, None if self.no_bias else bias, @@ -234,9 +231,9 @@ class TestFunctionalConv3DTransposeError(TestCase): stride=self.stride, dilation=self.dilation, groups=self.groups, - act=self.act, - data_format=self.data_format, - use_cudnn=self.use_cudnn) + data_format=self.data_format) + if self.act == 'sigmoid': + y = F.sigmoid(y) class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): @@ -250,7 +247,6 @@ class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -265,7 +261,6 @@ class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -280,7 +275,6 @@ class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = True self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -295,7 +289,6 @@ class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -310,7 +303,6 @@ class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): self.groups = 4 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = False self.data_format = "NDHWC" @@ -326,7 +318,6 @@ class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -341,7 +332,6 @@ class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -356,7 +346,6 @@ class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -371,7 +360,6 @@ class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -386,7 +374,6 @@ class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -402,7 +389,6 @@ class TestFunctionalConv3DTransposeErrorCase2( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -418,7 +404,6 @@ class TestFunctionalConv3DTransposeErrorCase3( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NDHWC" @@ -434,7 +419,6 @@ class TestFunctionalConv3DTransposeErrorCase4( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -450,23 +434,6 @@ class TestFunctionalConv3DTransposeErrorCase5( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeErrorCase6( - TestFunctionalConv3DTransposeError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = "not_valid" self.data_format = "NCDHW" @@ -483,7 +450,6 @@ class TestFunctionalConv3DTransposeErrorCase7( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" @@ -499,7 +465,6 @@ class TestFunctionalConv3DTransposeErrorCase8( self.groups = 1 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "not_valid" @@ -515,7 +480,6 @@ class TestFunctionalConv3DTransposeErrorCase9( self.groups = 2 self.no_bias = False self.act = "sigmoid" - self.use_cudnn = True self.data_format = "NCDHW" diff --git a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py index 892f63bf15b742c51ddbc15262f888e43cdd03f3..bd934c76ebfa2ed7c9b11223b34c812e605ebe18 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py @@ -18,12 +18,11 @@ import unittest import numpy as np from op_test import OpTest import paddle.fluid as fluid +import paddle class TestGatherNdOpWithEmptyIndex(OpTest): - """ - Index has empty element, which means copy entire tensor - """ + #Index has empty element, which means copy entire tensor def setUp(self): self.op_type = "gather_nd" @@ -40,10 +39,22 @@ class TestGatherNdOpWithEmptyIndex(OpTest): self.check_grad(['X'], 'Out') +class TestGatherNdOpWithIndex1(OpTest): + def setUp(self): + self.op_type = "gather_nd" + xnp = np.random.random((5, 20)).astype("float64") + self.inputs = {'X': xnp, 'Index': np.array([1]).astype("int32")} + self.outputs = {'Out': self.inputs["X"][self.inputs["Index"]]} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + class TestGatherNdOpWithLowIndex(OpTest): - """ - Index has low rank, X has high rank - """ + #Index has low rank, X has high rank def setUp(self): self.op_type = "gather_nd" @@ -61,10 +72,27 @@ class TestGatherNdOpWithLowIndex(OpTest): self.check_grad(['X'], 'Out') +class TestGatherNdOpIndex1(OpTest): + #Index has low rank, X has high rank + + def setUp(self): + self.op_type = "gather_nd" + xnp = np.random.uniform(0, 100, (10, 10)).astype("float64") + index = np.array([1, 2]).astype("int64") + + self.inputs = {'X': xnp, 'Index': index} + + self.outputs = {'Out': xnp[tuple(index.T)]} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + class TestGatherNdOpWithSameIndexAsX(OpTest): - """ - Index has same rank as X's rank - """ + #Index has same rank as X's rank def setUp(self): self.op_type = "gather_nd" @@ -82,9 +110,7 @@ class TestGatherNdOpWithSameIndexAsX(OpTest): class TestGatherNdOpWithHighRankSame(OpTest): - """ - Both Index and X have high rank, and Rank(Index) = Rank(X) - """ + #Both Index and X have high rank, and Rank(Index) = Rank(X) def setUp(self): self.op_type = "gather_nd" @@ -103,9 +129,7 @@ class TestGatherNdOpWithHighRankSame(OpTest): class TestGatherNdOpWithHighRankDiff(OpTest): - """ - Both Index and X have high rank, and Rank(Index) < Rank(X) - """ + #Both Index and X have high rank, and Rank(Index) < Rank(X) def setUp(self): self.op_type = "gather_nd" @@ -162,5 +186,63 @@ class TestGatherNdOpRaise(unittest.TestCase): self.assertRaises(IndexError, check_raise_is_test) +class TestGatherNdError(unittest.TestCase): + def test_error(self): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + + shape = [8, 9, 6] + x = paddle.data(shape=shape, dtype='float32', name='x') + index = paddle.data(shape=shape, dtype='bool', name='index') + index_float = paddle.data( + shape=shape, dtype='float32', name='index_float') + np_x = np.random.random(shape).astype('float32') + np_index = np.array(np.random.randint(2, size=shape, dtype=bool)) + + def test_x_type(): + paddle.gather_nd(np_x, index) + + self.assertRaises(TypeError, test_x_type) + + def test_index_type(): + paddle.gather_nd(x, np_index) + + self.assertRaises(TypeError, test_index_type) + + def test_index_dtype(): + paddle.gather_nd(x, index_float) + + self.assertRaises(TypeError, test_index_dtype) + + +class TestGatherNdAPI2(unittest.TestCase): + def test_static(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64') + index = fluid.layers.data('index', shape=[-1, 1], dtype='int32') + out = paddle.gather_nd(data1, index) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + input = np.array([[1, 2], [3, 4], [5, 6]]) + index_1 = np.array([[1]]) + result, = exe.run(feed={"data1": input, + "index": index_1}, + fetch_list=[out]) + expected_output = np.array([[3, 4]]) + self.assertTrue(np.allclose(result, expected_output)) + + def test_imperative(self): + paddle.disable_static() + input_1 = np.array([[1, 2], [3, 4], [5, 6]]) + index_1 = np.array([[1]]) + input = fluid.dygraph.to_variable(input_1) + index = fluid.dygraph.to_variable(index_1) + output = paddle.fluid.layers.gather(input, index) + output_np = output.numpy() + expected_output = np.array([3, 4]) + self.assertTrue(np.allclose(output_np, expected_output)) + paddle.enable_static() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gather_op.py b/python/paddle/fluid/tests/unittests/test_gather_op.py index f8763e731eeed3b36a6271167a57b9277479b5ba..1f6e522d2668b5dcd2075ff7af6b4b1ee674632d 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_op.py @@ -21,6 +21,13 @@ import paddle import paddle.fluid as fluid +def gather_numpy(x, index, axis): + x_transpose = np.swapaxes(x, 0, axis) + tmp_gather = x_transpose[index, ...] + gather = np.swapaxes(tmp_gather, 0, axis) + return gather + + class TestGatherOp(OpTest): def setUp(self): self.op_type = "gather" @@ -108,12 +115,80 @@ class TestCase6(TestGatherOp): self.index_type = "int32" +class TestGatherOp1(OpTest): + def setUp(self): + self.op_type = "gather" + self.config() + xnp = np.random.random(self.x_shape).astype(self.x_type) + axis_np = np.array(self.axis).astype(self.index_type) + index_np = np.array(self.index).astype(self.index_type) + out = gather_numpy(xnp, index_np, axis_np[0]) + self.inputs = {'X': xnp, 'Index': index_np, 'Axis': axis_np} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + def config(self): + """ + For multi-dimension input + """ + self.x_shape = (3, 88, 3) + self.x_type = "float64" + self.index = [1, 3, 5] + self.index_type = "int32" + self.axis = [1] + self.axis_type = "int32" + + +class TestGatherOp2(TestGatherOp1): + def config(self): + """ + For multi-dimension input + """ + self.x_shape = (10, 88, 10) + self.x_type = "float64" + self.index = [1, 3, 5] + self.index_type = "int64" + self.axis = [0] + self.axis_type = "int32" + + +class TestGatherOp3(TestGatherOp1): + def config(self): + """ + For multi-dimension input + """ + self.x_shape = (10, 88, 10) + self.x_type = "float64" + self.index = [1, 3, 5] + self.index_type = "int64" + self.axis = [2] + self.axis_type = "int32" + + +class TestGatherOp4(TestGatherOp1): + def config(self): + """ + For multi-dimension input + """ + self.x_shape = (3, 100, 10) + self.x_type = "float64" + self.index = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + self.index_type = "int64" + self.axis = [0] + self.axis_type = "int32" + + class API_TestGather(unittest.TestCase): - def test_out(self): + def test_out1(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64') - index = fluid.layers.data('index', shape=[-1, 1], dtype='float64') - out = paddle.gather(data1, index) + index = fluid.layers.data('index', shape=[-1, 1], dtype='int32') + out = paddle.fluid.layers.gather(data1, index) place = fluid.CPUPlace() exe = fluid.Executor(place) input = np.array([[1, 2], [3, 4], [5, 6]]) @@ -124,18 +199,103 @@ class API_TestGather(unittest.TestCase): expected_output = np.array([[3, 4], [5, 6]]) self.assertTrue(np.allclose(result, expected_output)) + def test_out2(self): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + x = paddle.data('x', shape=[-1, 2], dtype='float64') + index = paddle.data('index', shape=[-1, 1], dtype='int32') + axis = paddle.data('axis', shape=[1], dtype='int32') + out = paddle.gather(x, index, axis) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + x_np = np.array([[1, 2], [3, 4], [5, 6]]).astype('float64') + index_np = np.array([1, 1]).astype('int32') + axis_np = np.array([1]).astype('int32') + result, = exe.run( + feed={"x": x_np, + "index": index_np, + 'axis': axis_np}, + fetch_list=[out]) + expected_output = gather_numpy(x_np, index_np, axis_np) + self.assertTrue(np.allclose(result, expected_output)) + class API_TestDygraphGather(unittest.TestCase): - def test_out(self): - with fluid.dygraph.guard(): - input_1 = np.array([[1, 2], [3, 4], [5, 6]]) - index_1 = np.array([1, 2]) - input = fluid.dygraph.to_variable(input_1) - index = fluid.dygraph.to_variable(index_1) - output = paddle.fluid.layers.gather(input, index) - output_np = output.numpy() - expected_output = np.array([[3, 4], [5, 6]]) + def test_out1(self): + paddle.disable_static() + input_1 = np.array([[1, 2], [3, 4], [5, 6]]) + index_1 = np.array([1, 2]) + input = paddle.to_tensor(input_1) + index = paddle.to_tensor(index_1) + output = paddle.fluid.layers.gather(input, index) + output_np = output.numpy() + expected_output = np.array([[3, 4], [5, 6]]) + self.assertTrue(np.allclose(output_np, expected_output)) + paddle.enable_static() + + def test_out12(self): + paddle.disable_static() + input_1 = np.array([[1, 2], [3, 4], [5, 6]]) + index_1 = np.array([1, 2]) + x = paddle.to_tensor(input_1) + index = paddle.to_tensor(index_1) + output = paddle.gather(x, index, axis=0) + output_np = output.numpy() + expected_output = gather_numpy(input_1, index_1, axis=0) self.assertTrue(np.allclose(output_np, expected_output)) + paddle.enable_static() + + +class TestGathertError(unittest.TestCase): + def test_error1(self): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + + shape = [8, 9, 6] + x = paddle.data(shape=shape, dtype='int8', name='x') + axis = paddle.data(shape=[1], dtype='float32', name='axis') + index = paddle.data(shape=shape, dtype='int32', name='index') + index_float = paddle.data( + shape=shape, dtype='float32', name='index_float') + + def test_x_type(): + paddle.gather(x, index) + + self.assertRaises(TypeError, test_x_type) + + def test_index_type(): + paddle.gather(x, index_float) + + self.assertRaises(TypeError, test_index_type) + + def test_axis_dtype(): + paddle.gather(x, index, axis=1.11) + + self.assertRaises(TypeError, test_axis_dtype) + + def test_axis_dtype(): + paddle.gather(x, index, axis=axis) + + self.assertRaises(TypeError, test_axis_dtype) + + def test_error2(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + + shape = [8, 9, 6] + x = fluid.data(shape=shape, dtype='int8', name='x') + index = fluid.data(shape=shape, dtype='int32', name='mask') + index_float = fluid.data( + shape=shape, dtype='float32', name='index_float') + + def test_x_type(): + paddle.fluid.layers.gather(x, index) + + self.assertRaises(TypeError, test_x_type) + + def test_index_type(): + paddle.fluid.layers.gather(x, index_float) + + self.assertRaises(TypeError, test_index_type) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py index a5d36203b0ad567a4ba25d686652c9384ea424bf..5054256ca72477785076cdff69266160f6c7d640 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py @@ -224,7 +224,8 @@ def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic): class TestGenerateProposalLabelsOp(OpTest): def set_data(self): - self.use_random = False + #self.use_random = False + self.init_use_random() self.init_test_cascade() self.init_test_params() self.init_test_input() @@ -267,6 +268,9 @@ class TestGenerateProposalLabelsOp(OpTest): def init_test_cascade(self, ): self.is_cascade_rcnn = False + def init_use_random(self): + self.use_random = False + def init_test_params(self): self.batch_size_per_im = 512 self.fg_fraction = 0.25 @@ -329,6 +333,28 @@ class TestCascade(TestGenerateProposalLabelsOp): self.is_cascade_rcnn = True +class TestUseRandom(TestGenerateProposalLabelsOp): + def init_use_random(self): + self.use_random = True + self.is_cascade_rcnn = False + + def test_check_output(self): + self.check_output_customized(self.verify_out) + + def verify_out(self, outs): + print("skip") + + def init_test_params(self): + self.batch_size_per_im = 512 + self.fg_fraction = 0.025 + self.fg_thresh = 0.5 + self.bg_thresh_hi = 0.5 + self.bg_thresh_lo = 0.0 + self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] + self.is_cls_agnostic = False + self.class_nums = 2 if self.is_cls_agnostic else 81 + + class TestClsAgnostic(TestCascade): def init_test_params(self): self.batch_size_per_im = 512 diff --git a/python/paddle/fluid/tests/unittests/test_generator.py b/python/paddle/fluid/tests/unittests/test_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..6cc43d3d5498284e8a24dd272eaed08cdf830733 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_generator.py @@ -0,0 +1,44 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test cloud role maker.""" + +from __future__ import print_function +import os +import unittest +import paddle.fluid.generator as generator +import time # temp for debug + + +class TestGenerator(unittest.TestCase): + """ + Test cases for cpu generator. + """ + + def test_basic_generator(self): + """Test basic generator.""" + gen = generator.Generator() + gen.manual_seed(123123143) + s = gen.initial_seed() + s = gen.seed() + st = gen.get_state() + gen.set_state(st) + gen.random() + gen.set_cpu_engine(gen.get_cpu_engine()) + + def test_basic_generator_error(self): + self.assertRaises(ValueError, generator.Generator, device="CUDA") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py index 6660bfb0c747300741b305a101734e1ef808eeb5..4f0beb8c0dcd5384e7b9f6e30e8082595ac4dc06 100644 --- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py @@ -124,14 +124,8 @@ class TestBase(unittest.TestCase): label = item['label'] assert image.shape() == [BATCH_SIZE, 784] assert label.shape() == [BATCH_SIZE, 1] - if ps[i]._equals(fluid.CPUPlace()): - assert image._place()._equals(fluid.CPUPlace()) - assert label._place()._equals(fluid.CPUPlace()) - else: - assert image._place()._equals( - fluid.CUDAPinnedPlace()) - assert label._place()._equals( - fluid.CUDAPinnedPlace()) + assert image._place()._equals(ps[i]) + assert label._place()._equals(ps[i]) L, = exe.run(program=prog, feed=d, fetch_list=[loss], diff --git a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py new file mode 100644 index 0000000000000000000000000000000000000000..4a33f32a0b6977716d8065419f8e0f88d6c4f44a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py @@ -0,0 +1,131 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +from paddle import fluid, nn +import paddle.fluid.dygraph as dg +import paddle.nn.functional as F +import unittest + + +class GridSampleTestCase(unittest.TestCase): + def __init__(self, + methodName='runTest', + x_shape=[2, 2, 3, 3], + grid_shape=[2, 3, 3, 2], + mode="bilinear", + padding_mode="zeros", + align_corners=False): + super(GridSampleTestCase, self).__init__(methodName) + self.padding_mode = padding_mode + self.x_shape = x_shape + self.grid_shape = grid_shape + self.mode = mode + self.padding_mode = padding_mode + self.align_corners = align_corners + self.dtype = "float64" + + def setUp(self): + self.x = np.random.randn(*(self.x_shape)).astype(self.dtype) + self.grid = np.random.uniform(-1, 1, self.grid_shape).astype(self.dtype) + + def static_functional(self, place): + main = fluid.Program() + start = fluid.Program() + with fluid.unique_name.guard(): + with fluid.program_guard(main, start): + x = fluid.data("x", self.x_shape, dtype=self.dtype) + grid = fluid.data("grid", self.grid_shape, dtype=self.dtype) + y_var = F.grid_sample( + x, + grid, + mode=self.mode, + padding_mode=self.padding_mode, + align_corners=self.align_corners) + feed_dict = {"x": self.x, "grid": self.grid} + exe = fluid.Executor(place) + exe.run(start) + y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var]) + return y_np + + def dynamic_functional(self): + x_t = paddle.to_tensor(self.x) + grid_t = paddle.to_tensor(self.grid) + y_t = F.grid_sample( + x_t, + grid_t, + mode=self.mode, + padding_mode=self.padding_mode, + align_corners=self.align_corners) + y_np = y_t.numpy() + return y_np + + def _test_equivalence(self, place): + result1 = self.static_functional(place) + with dg.guard(place): + result2 = self.dynamic_functional() + np.testing.assert_array_almost_equal(result1, result2) + + def runTest(self): + place = fluid.CPUPlace() + self._test_equivalence(place) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + self._test_equivalence(place) + + +class GridSampleErrorTestCase(GridSampleTestCase): + def runTest(self): + place = fluid.CPUPlace() + with self.assertRaises(ValueError): + self.static_functional(place) + + +def add_cases(suite): + suite.addTest(GridSampleTestCase(methodName='runTest')) + suite.addTest( + GridSampleTestCase( + methodName='runTest', + mode='bilinear', + padding_mode='reflect', + align_corners=True)) + suite.addTest( + GridSampleTestCase( + methodName='runTest', + mode='bilinear', + padding_mode='zeros', + align_corners=True)) + + +def add_error_cases(suite): + suite.addTest( + GridSampleErrorTestCase( + methodName='runTest', padding_mode="VALID")) + suite.addTest( + GridSampleErrorTestCase( + methodName='runTest', align_corners="VALID")) + suite.addTest(GridSampleErrorTestCase(methodName='runTest', mode="VALID")) + + +def load_tests(loader, standard_tests, pattern): + suite = unittest.TestSuite() + add_cases(suite) + add_error_cases(suite) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py index bd5a07769e30de5110566f630de2d480e3426c77..4d1ed5aeb96ebbe064e35c1bee9d5775812440f7 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py @@ -17,17 +17,17 @@ import numpy as np from op_test import OpTest -def AffineGrid(theta, size): - n = size[0] - h = size[2] - w = size[3] +def AffineGrid(theta, grid_shape): + n = grid_shape[0] + h = grid_shape[1] + w = grid_shape[2] h_idx = np.repeat( np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis] w_idx = np.repeat( np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis] grid = np.concatenate( [w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3 - grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3 + grid = np.repeat(grid[np.newaxis, :], n, axis=0) # n * h * w *3 ret = np.zeros([n, h * w, 2]) theta = theta.transpose([0, 2, 1]) @@ -40,15 +40,19 @@ def AffineGrid(theta, size): def getGridPointValue(data, x, y): data_shape = data.shape N = data_shape[0] - H = data_shape[2] - W = data_shape[3] - - out = np.zeros(data_shape, dtype='float64') + C = data_shape[1] + in_H = data_shape[2] + in_W = data_shape[3] + out_H = x.shape[1] + out_W = x.shape[2] + + #out = np.zeros(data_shape, dtype='float64') + out = np.zeros([N, C, out_H, out_W], dtype='float64') for i in range(N): - for j in range(H): - for k in range(W): - if y[i, j, k] < 0 or y[i, j, k] > H - 1 or x[i, j, k] < 0 or x[ - i, j, k] > W - 1: + for j in range(out_H): + for k in range(out_W): + if y[i, j, k] < 0 or y[i, j, k] > in_H - 1 or x[ + i, j, k] < 0 or x[i, j, k] > in_W - 1: out[i, :, j, k] = 0 else: out[i, :, j, k] = data[i, :, y[i, j, k], x[i, j, k]] @@ -56,44 +60,89 @@ def getGridPointValue(data, x, y): return out -def GridSampler(data, grid): - dims = data.shape - N = dims[0] - C = dims[1] - H = dims[2] - W = dims[3] +def clip(x, min_n, max_n): + return np.maximum(np.minimum(x, max_n), min_n) - x = grid[:, :, :, 0] - y = grid[:, :, :, 1] - y_max = H - 1 - x_max = W - 1 - x = 0.5 * ((x.astype('float64') + 1.0) * x_max) - y = 0.5 * ((y.astype('float64') + 1.0) * y_max) +def unnormalizeAndClip(grid_slice, max_val, align_corners, padding_mode): + if align_corners: + grid_slice = 0.5 * ((grid_slice.astype('float64') + 1.0) * max_val) + else: + grid_slice = 0.5 * ( + (grid_slice.astype('float64') + 1.0) * (max_val + 1)) - 0.5 + + if padding_mode == "border": + grid_slice = clip(grid_slice, 0, max_val) + elif padding_mode == "reflect": + double_range = 2 * max_val if align_corners else (max_val + 1) * 2 + grid_abs = np.abs(grid_slice) if align_corners else np.abs(grid_slice + + 0.5) + extra = grid_abs - np.floor(grid_abs / double_range) * double_range + grid_slice = np.minimum(extra, double_range - extra) + grid_slice = grid_slice if align_corners else clip(grid_slice - 0.5, 0, + max_val) + return grid_slice - x0 = np.floor(x).astype('int32') - x1 = x0 + 1 - y0 = np.floor(y).astype('int32') - y1 = y0 + 1 - wa = np.tile(((x1 - x) * (y1 - y)).reshape((N, 1, H, W)), (1, C, 1, 1)) - wb = np.tile(((x1 - x) * (y - y0)).reshape((N, 1, H, W)), (1, C, 1, 1)) - wc = np.tile(((x - x0) * (y1 - y)).reshape((N, 1, H, W)), (1, C, 1, 1)) - wd = np.tile(((x - x0) * (y - y0)).reshape((N, 1, H, W)), (1, C, 1, 1)) +def GridSampler(data, + grid, + align_corners=True, + mode="bilinear", + padding_mode="zeros"): + dims = data.shape + N = dims[0] + in_C = dims[1] + in_H = dims[2] + in_W = dims[3] - va = getGridPointValue(data, x0, y0) - vb = getGridPointValue(data, x0, y1) - vc = getGridPointValue(data, x1, y0) - vd = getGridPointValue(data, x1, y1) + out_H = grid.shape[1] + out_W = grid.shape[2] - out = (wa * va + wb * vb + wc * vc + wd * vd).astype('float64') + x = grid[:, :, :, 0] + y = grid[:, :, :, 1] + y_max = in_H - 1 + x_max = in_W - 1 + + x = unnormalizeAndClip(x, x_max, align_corners, padding_mode) + y = unnormalizeAndClip(y, y_max, align_corners, padding_mode) + + if mode == "bilinear": + x0 = np.floor(x).astype('int32') + x1 = x0 + 1 + y0 = np.floor(y).astype('int32') + y1 = y0 + 1 + + wa = np.tile(((x1 - x) * (y1 - y)).reshape((N, 1, out_H, out_W)), + (1, in_C, 1, 1)) + wb = np.tile(((x1 - x) * (y - y0)).reshape((N, 1, out_H, out_W)), + (1, in_C, 1, 1)) + wc = np.tile(((x - x0) * (y1 - y)).reshape((N, 1, out_H, out_W)), + (1, in_C, 1, 1)) + wd = np.tile(((x - x0) * (y - y0)).reshape((N, 1, out_H, out_W)), + (1, in_C, 1, 1)) + + va = getGridPointValue(data, x0, y0) + vb = getGridPointValue(data, x0, y1) + vc = getGridPointValue(data, x1, y0) + vd = getGridPointValue(data, x1, y1) + + out = (wa * va + wb * vb + wc * vc + wd * vd).astype('float64') + elif mode == "nearest": + x = np.round(x).astype('int32') + y = np.round(y).astype('int32') + out = getGridPointValue(data, x, y) return out class TestGridSamplerOp(OpTest): def setUp(self): - self.initTestCase() + self.use_cudnn = False + self.numeric_grad_delta = 0.0001 self.op_type = 'grid_sampler' + self.align_corners = True + self.padding_mode = "zeros" + self.mode = "bilinear" + self.initTestCase() x = np.random.randint(0, 255, self.x_shape).astype('float64') theta = np.zeros(self.theta_shape).astype('float64') @@ -101,22 +150,90 @@ class TestGridSamplerOp(OpTest): for j in range(2): for k in range(3): theta[i, j, k] = np.random.rand(1)[0] - grid = AffineGrid(theta, self.x_shape) + grid = AffineGrid(theta, self.grid_shape) self.inputs = {'X': x, 'Grid': grid} - self.attrs = {'use_cudnn': True} - self.outputs = {'Output': GridSampler(x, grid)} + self.attrs = { + 'use_cudnn': self.use_cudnn, + "align_corners": self.align_corners, + "padding_mode": self.padding_mode, + "mode": self.mode + } + # print("X: {}".format(x)) + self.outputs = { + 'Output': GridSampler(x, grid, self.align_corners, self.mode, + self.padding_mode) + } def test_check_output(self): self.check_output() def test_check_grad_normal(self): - self.check_grad(['X', 'Grid'], 'Output', max_relative_error=0.61) + self.check_grad( + ['X', 'Grid'], + 'Output', + max_relative_error=0.01, + numeric_grad_delta=self.numeric_grad_delta) + + def initTestCase(self): + self.x_shape = (2, 3, 8, 8) + self.grid_shape = (2, 7, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = True + self.padding_mode = "zeros" + self.mode = "bilinear" + self.use_cudnn = True + + +class Case1(TestGridSamplerOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = False + self.padding_mode = "zeros" + self.mode = "bilinear" + + +class Case1(TestGridSamplerOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = False + self.padding_mode = "border" + self.mode = "bilinear" + + +class Case2(TestGridSamplerOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = False + self.padding_mode = "reflect" + self.mode = "bilinear" + + +class Case3(TestGridSamplerOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = True + self.padding_mode = "reflect" + self.mode = "bilinear" + +class Case4(TestGridSamplerOp): def initTestCase(self): - self.x_shape = (2, 5, 7, 3) - self.grid_shape = (2, 7, 3, 2) + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) self.theta_shape = (2, 2, 3) + self.align_corners = False + self.padding_mode = "reflect" + self.mode = "nearest" + self.numeric_grad_delta = 0.0001 if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_hdfs1.py b/python/paddle/fluid/tests/unittests/test_hdfs1.py new file mode 100644 index 0000000000000000000000000000000000000000..430ed1abe860869d791f0eac17accc8416db1eca --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_hdfs1.py @@ -0,0 +1,104 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils import LocalFS, HDFSClient, FSTimeOut, FSFileExistsError, FSFileNotExistsError + +java_home = os.environ["JAVA_HOME"] + +from paddle.fluid.tests.unittests.hdfs_test_utils import FSTestBase + + +class FSTest1(FSTestBase): + def test_timeout(self): + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) + src = "hdfs_test_timeout" + dst = "new_hdfs_test_timeout" + fs.delete(dst) + fs.mkdirs(src) + fs.mkdirs(dst) + fs.mkdirs(dst + "/" + src) + output = "" + try: + fs.mv(src, dst, test_exists=False) + self.assertFalse(1, "can't execute cmd:{} output:{}".format(cmd, + output)) + except FSTimeOut as e: + print("execute mv {} to {} timeout".format(src, dst)) + + cmd = "{} -mv {} {}".format(fs._base_cmd, src, dst) + ret, output = fluid.core.shell_execute_cmd(cmd, 6 * 1000, 2 * 1000) + self.assertNotEqual(ret, 0) + print("second mv ret:{} output:{}".format(ret, output)) + + def test_is_dir(self): + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) + self.assertFalse(fs.is_dir("./test_hdfs.py")) + s = """ +java.io.IOException: Input/output error + responseErrorMsg : failed to getFileStatus, errorCode: 3, path: /user/PUBLIC_KM_Data/wangxi16/data/serving_model, lparam: d868f6bb6822c621, errorMessage: inner error + at org.apache.hadoop.util.FileSystemUtil.throwException(FileSystemUtil.java:164) + at org.apache.hadoop.util.FileSystemUtil.dealWithResponse(FileSystemUtil.java:118) + at org.apache.hadoop.lite.client.LiteClientImpl.getFileStatus(LiteClientImpl.java:696) + at org.apache.hadoop.fs.LibDFileSystemImpl.getFileStatus(LibDFileSystemImpl.java:297) + at org.apache.hadoop.fs.LiteFileSystem.getFileStatus(LiteFileSystem.java:514) + at org.apache.hadoop.fs.FsShell.test(FsShell.java:1092) + at org.apache.hadoop.fs.FsShell.run(FsShell.java:2285) + at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65) + at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79) + at org.apache.hadoop.fs.FsShell.main(FsShell.java:2353) + """ + + print("split lines:", s.splitlines()) + self.assertTrue(fs._test_match(s.splitlines()) != None) + + def test_config(self): + config = {"fs.default.name": "hdfs://xxx", "hadoop.job.ugi": "ugi"} + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + config, + time_out=6 * 1000, + sleep_inter=100) + + def test_exists(self): + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + None, + time_out=6 * 1000, + sleep_inter=100) + self.assertFalse(fs.is_exist(os.path.abspath("./xxxx"))) + self.assertFalse(fs.is_dir(os.path.abspath("./xxxx"))) + self.assertTrue(fs.is_dir(os.path.abspath("./xxx/.."))) + dirs, files = fs.ls_dir(os.path.abspath("./test_hdfs1.py")) + self.assertTrue(dirs == []) + self.assertTrue(len(files) == 1) + dirs, files = fs.ls_dir(os.path.abspath("./xxx/..")) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_hdfs2.py b/python/paddle/fluid/tests/unittests/test_hdfs2.py new file mode 100644 index 0000000000000000000000000000000000000000..7754f89e3c901ac14cb102881e8d338442038559 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_hdfs2.py @@ -0,0 +1,50 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils import LocalFS, HDFSClient, FSTimeOut, FSFileExistsError, FSFileNotExistsError + +java_home = os.environ["JAVA_HOME"] + +from paddle.fluid.tests.unittests.hdfs_test_utils import FSTestBase + + +class FSTest2(FSTestBase): + def test_hdfs(self): + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + None, + time_out=5 * 1000, + sleep_inter=100) + self._test_rm(fs) + self._test_touch(fs) + self._test_dirs(fs) + + def test_local(self): + fs = LocalFS() + self._test_rm(fs) + self._test_touch(fs) + self._test_dirs(fs) + + self._test_touch_file(fs) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_hdfs3.py b/python/paddle/fluid/tests/unittests/test_hdfs3.py new file mode 100644 index 0000000000000000000000000000000000000000..1a045f4b17fc9b8b68ccf81a23cb953db58a9db7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_hdfs3.py @@ -0,0 +1,53 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet +import os +import sys + +from paddle.distributed.fleet.utils import LocalFS, HDFSClient, FSTimeOut, FSFileExistsError, FSFileNotExistsError + +java_home = os.environ["JAVA_HOME"] + +from paddle.fluid.tests.unittests.hdfs_test_utils import FSTestBase + + +class FSTest3(FSTestBase): + def test_hdfs(self): + fs = HDFSClient( + "/usr/local/hadoop-2.7.7/", + None, + time_out=5 * 1000, + sleep_inter=100) + self._test_mkdirs(fs) + self._test_list_dir(fs) + self._test_try_upload(fs) + self._test_try_download(fs) + + self._test_upload(fs) + self._test_download(fs) + + def test_local(self): + fs = LocalFS() + self._test_mkdirs(fs) + self._test_list_dir(fs) + self._test_try_upload(fs) + self._test_try_download(fs) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 8a88c2d673c4d1450064c84a8036e1cbe7179b66..f83f8ef35215e5a0199c4d63744882126212b928 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -21,6 +21,7 @@ from paddle.fluid import core from paddle.fluid import Linear from test_imperative_base import new_program_scope import paddle.fluid.dygraph_utils as dygraph_utils +from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper import paddle @@ -629,6 +630,16 @@ class TestDygraphUtils(unittest.TestCase): res2 = fluid.layers.sigmoid(a) self.assertTrue(np.allclose(res1.numpy(), res2.numpy())) + def test_append_activation_in_dygraph3(self): + a_np = np.random.random(size=(10, 20, 30)).astype(np.float32) + helper = LayerObjectHelper(fluid.unique_name.generate("test")) + func = helper.append_activation + with fluid.dygraph.guard(): + a = fluid.dygraph.to_variable(a_np) + res1 = func(a, act="sigmoid", use_cudnn=True) + res2 = fluid.layers.sigmoid(a) + self.assertTrue(np.array_equal(res1.numpy(), res2.numpy())) + def test_append_bias_in_dygraph_exception(self): with new_program_scope(): np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py index 82e81d72f9a9823817355087d332c3d7fb1ffe5a..820206a3ce630eb92a36a154ca7cdec62de2ce34 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import unittest @@ -27,7 +28,7 @@ class TestTracerMode(unittest.TestCase): def get_tracer_mode(self): assert fluid.in_dygraph_mode(), "Dygraph mode must be enabled" - @fluid.dygraph.no_grad + @paddle.no_grad() def no_grad_func(self, a): self.assertEqual(self.tracer._train_mode, False) return a @@ -55,13 +56,32 @@ class TestTracerMode(unittest.TestCase): def need_no_grad_func(a, b=1): return a + b - decorated_func = fluid.dygraph.no_grad(need_no_grad_func) + decorated_func = paddle.no_grad()(need_no_grad_func) self.assertTrue( str(inspect.getargspec(decorated_func)) == str(inspect.getargspec(need_no_grad_func))) self.assertEqual(self.tracer._train_mode, self.init_mode) + def test_gen(): + for i in range(3): + yield i + + a = 0 + for i in test_gen(): + a += i + + @paddle.no_grad() + def test_wrapped_gen(): + for i in range(3): + yield i + + b = 0 + for i in test_wrapped_gen(): + b += i + + self.assertEqual(a, b) + with fluid.dygraph.guard(): self.check_not_support_rlt(False) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py index a391c088a3640c097ff0f4ff714bf50470c575c6..b15ad911ee79d47011be6eaa4bde62ba71c55c0e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py @@ -28,11 +28,11 @@ class LeNetDygraph(fluid.dygraph.Layer): super(LeNetDygraph, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( - nn.Conv2D( + nn.Conv2d( 1, 6, 3, stride=1, padding=1), nn.ReLU(), nn.Pool2D(2, 'max', 2), - nn.Conv2D( + nn.Conv2d( 6, 16, 5, stride=1, padding=0), nn.ReLU(), nn.Pool2D(2, 'max', 2)) @@ -61,7 +61,7 @@ def init_weights(layer): new_bias = paddle.fill_constant( layer.bias.shape, layer.bias.dtype, value=-0.1) layer.bias.set_value(new_bias) - elif type(layer) == nn.Conv2D: + elif type(layer) == nn.Conv2d: new_weight = paddle.fill_constant( layer.weight.shape, layer.weight.dtype, value=0.7) layer.weight.set_value(new_weight) @@ -81,7 +81,7 @@ class TestLayerApply(unittest.TestCase): if type(layer) == nn.Linear: np.testing.assert_allclose(layer.weight.numpy(), 0.9) np.testing.assert_allclose(layer.bias.numpy(), -0.1) - elif type(layer) == nn.Conv2D: + elif type(layer) == nn.Conv2d: np.testing.assert_allclose(layer.weight.numpy(), 0.7) np.testing.assert_allclose(layer.bias.numpy(), -0.2) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py new file mode 100644 index 0000000000000000000000000000000000000000..c7e0902341a59649219cf94ef9741fdf7ae09233 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import paddle +import paddle.nn as nn +import paddle.fluid as fluid + +import numpy as np + + +class LeNetDygraph(fluid.dygraph.Layer): + def __init__(self): + super(LeNetDygraph, self).__init__() + self.features = nn.Sequential( + nn.Conv2d( + 1, 6, 3, stride=1, padding=1), + nn.ReLU(), + nn.Pool2D(2, 'max', 2), + nn.Conv2d( + 6, 16, 5, stride=1, padding=0), + nn.ReLU(), + nn.Pool2D(2, 'max', 2)) + + def forward(self, inputs): + x = self.features(inputs) + + return x + + +class TestLayerChildren(unittest.TestCase): + def test_apply_init_weight(self): + with fluid.dygraph.guard(): + net = LeNetDygraph() + net.eval() + + net_layers = nn.Sequential(*list(net.children())) + net_layers.eval() + + x = paddle.rand([2, 1, 28, 28]) + + y1 = net(x) + y2 = net_layers(x) + + np.testing.assert_allclose(y1.numpy(), y2.numpy()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..9f75c92b185ed338eca15cab1b624da97b1fda33 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py @@ -0,0 +1,728 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six +import itertools + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer +from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer +from paddle.fluid.dygraph import Linear +from paddle.fluid.dygraph.base import to_variable +from test_imperative_base import new_program_scope + +# Note(wangzhongpu) +# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer. + + +class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): + super(MLP, self).__init__() + + self._fc1 = Linear(784, 10) + self._fc2 = Linear(10, 10) + + def forward(self, inputs): + y = self._fc1(inputs) + y = self._fc2(y) + return y + + +class TestImperativeOptimizerBase(unittest.TestCase): + def setUp(self): + self.batch_num = 20 + + def get_optimizer_dygraph(self, parameter_list): + raise NotImplementedError() + + def get_optimizer(self): + raise NotImplementedError() + + def reader_decorator(self, reader): + def _reader_imple(): + for item in reader(): + image = np.array(item[0]).reshape(1, 784) + label = np.array(item[1]).astype('int64').reshape(1) + yield image, label + + return _reader_imple + + def _check_exception(self, exception_message, place=None): + seed = 90 + batch_size = 128 + if place == None: + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + with fluid.dygraph.guard(place): + try: + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + mlp = MLP() + optimizer = self.get_optimizer_dygraph( + parameter_list=mlp.parameters()) + except Exception as e: + assert str(e) == exception_message + + def _check_mlp(self, place=None): + seed = 90 + batch_size = 128 + + if place == None: + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + + with fluid.dygraph.guard(place): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + mlp = MLP() + optimizer = self.get_optimizer_dygraph( + parameter_list=mlp.parameters()) + + batch_py_reader = fluid.io.PyReader(capacity=1) + batch_py_reader.decorate_sample_list_generator( + paddle.batch( + self.reader_decorator(paddle.dataset.mnist.train()), + batch_size=batch_size, + drop_last=True), + places=fluid.CPUPlace()) + + dy_param_init_value = {} + for batch_id, data in enumerate(batch_py_reader()): + if batch_id >= self.batch_num: + break + + img = data[0] + label = data[1] + label.stop_gradient = True + + img = fluid.layers.reshape(img, shape=[batch_size, -1]) + cost = mlp(img) + avg_loss = fluid.layers.reduce_mean(cost) + dy_out = avg_loss.numpy() + + if batch_id == 0: + for param in mlp.parameters(): + dy_param_init_value[param.name] = param.numpy() + + avg_loss.backward() + optimizer.minimize(avg_loss) + mlp.clear_gradients() + dy_param_value = {} + for param in mlp.parameters(): + dy_param_value[param.name] = param.numpy() + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + if place == None: + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + + exe = fluid.Executor(place) + + mlp = MLP() + optimizer = self.get_optimizer() + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + img = fluid.layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + img = fluid.layers.reshape(img, shape=[batch_size, 784]) + cost = mlp(img) + avg_loss = fluid.layers.reduce_mean(cost) + optimizer.minimize(avg_loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in mlp.parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for batch_id, data in enumerate(train_reader()): + if batch_id >= self.batch_num: + break + + static_x_data = np.array( + [x[0].reshape(1, 28, 28) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + [128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run(fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[i] + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + + for key, value in six.iteritems(static_param_value): + self.assertTrue(np.allclose(value, dy_param_value[key])) + + +class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + bd = [3, 6, 9] + optimizer = SGDOptimizer( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, + values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + bd = [3, 6, 9] + optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.natural_exp_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.exponential_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = Adam( + learning_rate=fluid.layers.inverse_time_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( + learning_rate=0.1, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + return optimizer + + def test_adam(self): + self._check_mlp() + + +class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.polynomial_decay( + learning_rate=0.1, decay_steps=5, cycle=self.cycle), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( + learning_rate=0.1, decay_steps=5, cycle=self.cycle)) + return optimizer + + def test_sgd_cycle(self): + self.cycle = True + self._check_mlp() + + def test_sgd(self): + self.cycle = False + self._check_mlp() + + +class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.cosine_decay( + learning_rate=0.1, step_each_epoch=10000, epochs=120), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( + learning_rate=0.1, step_each_epoch=10000, epochs=120)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = SGDOptimizer( + learning_rate=fluid.layers.noam_decay( + d_model=512, warmup_steps=8000), + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( + d_model=512, warmup_steps=8000)) + return optimizer + + def test_sgd(self): + self._check_mlp() + + +class TestOptimizerLearningRate(unittest.TestCase): + def test_constant_lr(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + + adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.get_lr(), 0.001, rtol=1e-06, atol=0.0)) + + for i in range(10): + adam.minimize(loss) + lr = adam.get_lr() + + self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0)) + + def test_lr_decay(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + + bd = [2, 4, 6, 8] + value = [0.2, 0.4, 0.6, 0.8, 1.0] + + adam = paddle.optimizer.Adam( + fluid.dygraph.PiecewiseDecay(bd, value, 0), + parameters=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)) + + ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] + for i in range(12): + adam.minimize(loss) + lr = adam.get_lr() + + self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0)) + + def test_lr_decay_natural_exp(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + base_lr = 1.0 + + adam = paddle.optimizer.Adam( + fluid.dygraph.NaturalExpDecay( + learning_rate=base_lr, + decay_steps=3, + decay_rate=0.5, + staircase=True), + parameters=linear.parameters()) + + self.assertTrue( + np.allclose( + adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)) + + ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)] + for i in range(5): + adam.minimize(loss) + lr = adam.get_lr() + + self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0)) + + def test_set_lr(self): + with fluid.dygraph.guard(): + a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + + linear = fluid.dygraph.nn.Linear(10, 10) + + a = fluid.dygraph.to_variable(a) + + b = linear(a) + + loss = fluid.layers.reduce_mean(b) + + adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) + + lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] + for i in range(5): + adam.set_lr(lr_list[i]) + adam.minimize(loss) + lr = adam.get_lr() + self.assertTrue( + np.allclose( + lr, lr_list[i], rtol=1e-06, atol=0.0)) + + lr_var = fluid.layers.create_global_var( + shape=[1], value=0.7, dtype='float32') + adam.set_lr(lr_var) + adam.minimize(loss) + lr = adam.get_lr() + self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0)) + + with self.assertRaises(RuntimeError): + adam = paddle.optimizer.Adam( + fluid.dygraph.NaturalExpDecay( + learning_rate=0.1, + decay_steps=3, + decay_rate=0.5, + staircase=True), + parameters=linear.parameters()) + adam.set_lr(0.01) + + +class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = MomentumOptimizer( + learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + return optimizer + + def test_momentum(self): + self._check_mlp() + + +class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = LarsMomentumOptimizer( + learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9) + return optimizer + + def test_larsmomentum(self): + self._check_mlp() + + +class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = AdagradOptimizer( + learning_rate=0.2, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = AdagradOptimizer(learning_rate=0.2) + return optimizer + + def test_adagrad(self): + self._check_mlp() + + +class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = AdamaxOptimizer( + learning_rate=0.2, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = AdamaxOptimizer(learning_rate=0.2) + return optimizer + + def test_adamax(self): + self._check_mlp() + + +class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = DpsgdOptimizer( + learning_rate=0.01, + clip=10.0, + batch_size=16.0, + sigma=1.0, + parameter_list=parameter_list) + optimizer._seed = 100 + return optimizer + + def get_optimizer(self): + optimizer = DpsgdOptimizer( + learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0) + optimizer._seed = 100 + return optimizer + + def test_dpsgd(self): + self._check_mlp(place=fluid.CPUPlace()) + + +class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = DecayedAdagradOptimizer( + learning_rate=0.2, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = DecayedAdagradOptimizer(learning_rate=0.2) + return optimizer + + def test_decayadagrad(self): + self._check_mlp() + + +class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = AdadeltaOptimizer( + learning_rate=0.0003, + epsilon=1.0e-6, + rho=0.95, + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = AdadeltaOptimizer( + learning_rate=0.0003, epsilon=1.0e-6, rho=0.95) + return optimizer + + def test_adadelta(self): + self._check_mlp() + + +class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = RMSPropOptimizer( + learning_rate=0.1, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = RMSPropOptimizer(learning_rate=0.1) + return optimizer + + def test_rmsprop(self): + self._check_mlp() + + +class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = FtrlOptimizer( + learning_rate=0.1, parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = FtrlOptimizer(learning_rate=0.1) + return optimizer + + def test_ftrl(self): + self._check_mlp() + + +def exclude_fn(param): + return param.name.endswith('.b_0') + + +class TestImperativeLambOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = LambOptimizer( + learning_rate=0.002, + exclude_from_weight_decay_fn=exclude_fn, + parameter_list=parameter_list) + return optimizer + + def get_optimizer(self): + optimizer = LambOptimizer( + learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn) + return optimizer + + def test_lamb(self): + self._check_mlp() + + +class TestImperativeModelAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = ModelAverage( + 0.15, min_average_window=10000, max_average_window=12500) + return optimizer + + def test_modelaverage(self): + exception_message = "In dygraph, don't support ModelAverage." + self._check_exception(exception_message) + + +class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = DGCMomentumOptimizer( + learning_rate=0.0001, + momentum=0.9, + rampup_step=1000, + rampup_begin_step=1252, + sparsity=[0.999, 0.999]) + return optimizer + + def test_dgcmomentum(self): + exception_message = "In dygraph, don't support DGCMomentumOptimizer." + self._check_exception(exception_message) + + +class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = ExponentialMovingAverage(0.999) + return optimizer + + def test_exponentialmoving(self): + exception_message = "In dygraph, don't support ExponentialMovingAverage." + self._check_exception(exception_message) + + +class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = paddle.optimizer.SGD(learning_rate=0.5, + parameter_list=parameter_list) + optimizer = PipelineOptimizer(optimizer) + return optimizer + + def test_pipline(self): + exception_message = "In dygraph, don't support PipelineOptimizer." + self._check_exception(exception_message) + + +class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = paddle.optimizer.SGD(learning_rate=0.5, + parameter_list=parameter_list) + optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5) + return optimizer + + def test_lookahead(self): + exception_message = "In dygraph, don't support LookaheadOptimizer." + self._check_exception(exception_message) + + +class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): + def get_optimizer_dygraph(self, parameter_list): + optimizer = paddle.optimizer.SGD(learning_rate=0.5, + parameter_list=parameter_list) + optimizer = RecomputeOptimizer(optimizer) + return optimizer + + def test_recompute(self): + exception_message = "In dygraph, don't support RecomputeOptimizer." + self._check_exception(exception_message) + + +class TestImperativeOptimizerList(unittest.TestCase): + def test_parameter_list(self): + with fluid.dygraph.guard(): + linear_1 = Linear(10, 10) + linear_2 = Linear(10, 10) + + sgd = SGDOptimizer( + 1.0, + parameter_list=itertools.chain(linear_1.parameters(), + linear_2.parameters())) + + in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + in_data = fluid.dygraph.to_variable(in_np) + + y = linear_1(in_data) + y = linear_2(y) + loss = fluid.layers.reduce_mean(y) + loss.backward() + sgd.minimize(loss) + + self.assertTrue( + len(sgd._parameter_list) == + len(linear_1.parameters() + linear_2.parameters())) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..4ab35a21aff43af822821c14007fbdd69a081803 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -0,0 +1,917 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.dygraph.nn import Embedding, Linear +import paddle.fluid.framework as framework +from paddle.optimizer import Adam +from paddle.fluid.dygraph.base import to_variable +from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay +from test_imperative_base import new_program_scope +import numpy as np +import six +import paddle + + +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__() + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + +class PtbModel(fluid.Layer): + def __init__(self, + hidden_size, + vocab_size, + num_layers=2, + num_steps=20, + init_scale=0.1, + dropout=None): + super(PtbModel, self).__init__() + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_layers = num_layers + self.num_steps = num_steps + self.dropout = dropout + self.simple_lstm_rnn = SimpleLSTMRNN( + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) + self.embedding = Embedding( + size=[vocab_size, hidden_size], + dtype='float32', + is_sparse=False, + param_attr=fluid.ParamAttr( + name='embedding_para', + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))) + + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + + def forward(self, input, label, init_hidden, init_cell): + init_h = fluid.layers.reshape( + init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + + init_c = fluid.layers.reshape( + init_cell, shape=[self.num_layers, -1, self.hidden_size]) + + x_emb = self.embedding(input) + x_emb = fluid.layers.reshape( + x_emb, shape=[-1, self.num_steps, self.hidden_size]) + if self.dropout is not None and self.dropout > 0.0: + x_emb = fluid.layers.dropout( + x_emb, + dropout_prob=self.drop_out, + dropout_implementation='upscale_in_train') + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, + init_c) + rnn_out = fluid.layers.reshape( + rnn_out, shape=[-1, self.num_steps, self.hidden_size]) + + projection = fluid.layers.matmul(rnn_out, self.softmax_weight) + projection = fluid.layers.elementwise_add(projection, self.softmax_bias) + projection = fluid.layers.reshape( + projection, shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) + loss = fluid.layers.reduce_mean(loss, dim=[0]) + loss = fluid.layers.reduce_sum(loss) + + return loss, last_hidden, last_cell + + +class TestDygraphPtbRnn(unittest.TestCase): + def setUp(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + self.opti_dict = adam.state_dict() + self.base_opti = {} + for k, v in self.opti_dict.items(): + if isinstance(v, core.VarBase): + self.base_opti[v.name] = v.numpy() + self.assertTrue(np.sum(np.abs(v.numpy())) != 0) + else: + self.base_opti[k] = v + + fluid.save_dygraph(self.opti_dict, "./test_dy") + + self.state_dict = ptb_model.state_dict() + + self.model_base = {} + for k, v in self.state_dict.items(): + np_t = v.numpy() + self.model_base[k] = np_t + + paddle.save(self.state_dict, "./test_dy") + + def testLoadAndSetVarBase(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + # set to zero + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + np_t = v.numpy() + var = v.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + para_state_dict, opti_state_dict = paddle.load("./test_dy") + adam.set_state_dict(opti_state_dict) + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + else: + self.assertEqual(v, self.base_opti[k]) + + # check parameter + state_dict = ptb_model.state_dict() + for k, v in state_dict.items(): + np_t = v.numpy() + var = v.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(para_state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetVariable(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + # set to zero + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + np_t = v.numpy() + var = v.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + adam.set_state_dict(self.opti_dict) + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + else: + self.assertEqual(v, self.base_opti[k]) + + # check parameter + state_dict = ptb_model.state_dict() + for k, v in state_dict.items(): + np_t = v.numpy() + var = v.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(self.state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetNumpy(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [1.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + new_lr = 1.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + if i == 0: + for param in ptb_model.parameters(): + dy_param_init[param.name] = param.numpy() + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + if i == batch_num - 1: + for param in ptb_model.parameters(): + dy_param_updated[param.name] = param.numpy() + + # check optimizer + opti_dict = adam.state_dict() + np_opti_dict = {} + # set to zero + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + np_t = v.numpy() + np_opti_dict[v.name] = np_t + var = v.value().get_tensor() + var.set(np.zeros_like(np_t), place) + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + else: + np_opti_dict[k] = v + + if isinstance(adam._learning_rate, LearningRateDecay): + adam._learning_rate.step_num = 0 + + adam.set_state_dict(np_opti_dict) + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if isinstance(v, core.VarBase): + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name])) + else: + self.assertEqual(v, self.base_opti[k]) + + # check parameter + state_dict = ptb_model.state_dict() + np_state_dict = {} + for k, v in state_dict.items(): + np_t = v.numpy() + np_state_dict[k] = np_t + var = v.value().get_tensor() + + var.set(np.zeros_like(np_t), place) + + ptb_model.set_dict(np_state_dict) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetVariableBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + adam.set_state_dict(self.opti_dict) + ptb_model.set_dict(self.state_dict) + + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testLoadAndSetVarBaseBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [0.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + # set lr to zero not update parameter + new_lr = 0.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=0.0, + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + state_dict, opti_dict = fluid.load_dygraph("./test_dy") + adam.set_state_dict(opti_dict) + ptb_model.set_dict(state_dict) + + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + # check parameter + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testSetNumpyBeforeTrain(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + # TODO: marsyang1993 Change seed to + ptb_model = PtbModel( + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + bd = [] + lr_arr = [0.0] + # this a fake lr decay strategy + for i in range(1, 10): + bd.append(100 * i) + # set lr to 0.0, not update parameter + new_lr = 0.0 + lr_arr.append(new_lr) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + adam = Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr_arr), + beta1=0.8, + beta2=0.6, + parameters=ptb_model.parameters()) + dy_param_updated = dict() + dy_param_init = dict() + dy_loss = None + last_hidden = None + last_cell = None + + np_opti_dict = {} + np_state_dict = {} + + for k, v in self.opti_dict.items(): + if isinstance(v, core.VarBase): + np_opti_dict[v.name] = v.numpy() + else: + np_opti_dict[k] = v + + for k, v in self.state_dict.items(): + np_state_dict[k] = v.numpy() + + adam.set_state_dict(np_opti_dict) + ptb_model.set_dict(np_state_dict) + for i in range(1): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + x = to_variable(x_data) + y = to_variable(y_data) + init_hidden = to_variable(init_hidden_data) + init_cell = to_variable(init_cell_data) + dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, + init_cell) + + dy_loss.backward() + adam.minimize(dy_loss) + ptb_model.clear_gradients() + + opti_dict = adam.state_dict() + for k, v in opti_dict.items(): + if k == "global_step": + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) + + if k.find("beta1_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta1)) + if k.find("beta2_pow_acc_0") > 0: + self.assertTrue( + np.array_equal(v.numpy(), self.base_opti[v.name] * + adam._beta2)) + + # check parameter + + state_dict = ptb_model.state_dict() + + for k, v in state_dict.items(): + new_t = v.numpy() + + base_t = self.model_base[k] + self.assertTrue(np.array_equal(new_t, base_t)) + + def testOnlyLoadParams(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy')) + + self.assertTrue(opti_state_dict == None) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy.pdparams')) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy.pdopt')) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py new file mode 100644 index 0000000000000000000000000000000000000000..8a868e751f0567e6387b0e9471f0382c9456bcb6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py @@ -0,0 +1,161 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.fluid as fluid +import unittest +import numpy as np + + +def run_static(x_np, dtype, op_str, use_gpu=False): + paddle.enable_static() + startup_program = fluid.Program() + main_program = fluid.Program() + place = paddle.CPUPlace() + if use_gpu and fluid.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + exe = fluid.Executor(place) + with fluid.program_guard(main_program, startup_program): + x = paddle.data(name='x', shape=x_np.shape, dtype=dtype) + res = getattr(paddle.tensor, op_str)(x) + exe.run(startup_program) + static_result = exe.run(main_program, + feed={'x': x_np}, + fetch_list=[res]) + return static_result + + +def run_dygraph(x_np, op_str, use_gpu=True): + place = paddle.CPUPlace() + if use_gpu and fluid.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + paddle.disable_static(place) + x = paddle.to_variable(x_np) + dygraph_result = getattr(paddle.tensor, op_str)(x) + return dygraph_result + + +def np_data_generator(low, high, np_shape, type, sv_list, op_str, *args, + **kwargs): + x_np = np.random.uniform(low, high, np_shape).astype(getattr(np, type)) + # x_np.shape[0] >= len(sv_list) + if type in ['float16', 'float32', 'float64']: + for i, v in enumerate(sv_list): + x_np[i] = v + ori_shape = x_np.shape + x_np = x_np.reshape((np.product(ori_shape), )) + np.random.shuffle(x_np) + x_np = x_np.reshape(ori_shape) + result_np = getattr(np, op_str)(x_np) + return x_np, result_np + + +TEST_META_DATA = [ + { + 'low': 0.1, + 'high': 1, + 'np_shape': [8, 17, 5, 6, 7], + 'type': 'float16', + 'sv_list': [np.inf, np.nan] + }, + { + 'low': 0.1, + 'high': 1, + 'np_shape': [11, 17], + 'type': 'float32', + 'sv_list': [np.inf, np.nan] + }, + { + 'low': 0.1, + 'high': 1, + 'np_shape': [2, 3, 4, 5], + 'type': 'float64', + 'sv_list': [np.inf, np.nan] + }, + { + 'low': 0, + 'high': 100, + 'np_shape': [11, 17, 10], + 'type': 'int32', + 'sv_list': [np.inf, np.nan] + }, + { + 'low': 0, + 'high': 999, + 'np_shape': [132], + 'type': 'int64', + 'sv_list': [np.inf, np.nan] + }, +] + + +def test(test_case, op_str, use_gpu=False): + for meta_data in TEST_META_DATA: + meta_data = dict(meta_data) + meta_data['op_str'] = op_str + x_np, result_np = np_data_generator(**meta_data) + static_result = run_static(x_np, meta_data['type'], op_str, use_gpu) + dygraph_result = run_dygraph(x_np, op_str, use_gpu) + test_case.assertTrue((static_result == result_np).all()) + test_case.assertTrue((dygraph_result.numpy() == result_np).all()) + + +class TestCPUNormal(unittest.TestCase): + def test_inf(self): + test(self, 'isinf') + + def test_nan(self): + test(self, 'isnan') + + def test_finite(self): + test(self, 'isfinite') + + +class TestCUDANormal(unittest.TestCase): + def test_inf(self): + test(self, 'isinf', True) + + def test_nan(self): + test(self, 'isnan', True) + + def test_finite(self): + test(self, 'isfinite', True) + + +class TestError(unittest.TestCase): + def test_bad_input(self): + paddle.enable_static() + with fluid.program_guard(fluid.Program()): + + def test_isinf_bad_x(): + x = [1, 2, 3] + result = paddle.tensor.isinf(x) + + self.assertRaises(TypeError, test_isinf_bad_x) + + def test_isnan_bad_x(): + x = [1, 2, 3] + result = paddle.tensor.isnan(x) + + self.assertRaises(TypeError, test_isnan_bad_x) + + def test_isfinite_bad_x(): + x = [1, 2, 3] + result = paddle.tensor.isfinite(x) + + self.assertRaises(TypeError, test_isfinite_bad_x) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 89b12da9cf999618407c9b094dff852fc7428704..4d7711a5df9fc3b70bcb3137dee0bcc949135266 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -15,6 +15,7 @@ from __future__ import print_function import os +import pickle import unittest import numpy as np @@ -25,7 +26,7 @@ from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME BATCH_SIZE = 32 -BATCH_NUM = 20 +BATCH_NUM = 10 SEED = 10 @@ -318,5 +319,76 @@ class TestJitMultipleLoading(unittest.TestCase): name_set.add(var.name) +class LinearNetReturnHidden(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetReturnHidden, self).__init__() + self._linear_1 = Linear(in_size, out_size) + self._linear_2 = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear_1(x) + z = self._linear_2(y) + loss = fluid.layers.mean(z) + return y, loss + + +class TestJitPruneModelAndLoad(unittest.TestCase): + def setUp(self): + self.linear_size = 4 + self.model_path = "model.jit_prune_model_and_load" + # enable dygraph mode + fluid.enable_dygraph() + # config seed + fluid.default_main_program().random_seed = SEED + + def train_and_save(self): + train_layer = LinearNetReturnHidden(8, 8) + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.1, parameter_list=train_layer.parameters()) + x = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + for i in range(10): + hidden, loss = train_layer(x) + loss.backward() + adam.minimize(loss) + train_layer.clear_gradients() + + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.output_spec = [hidden] + fluid.dygraph.jit.save( + layer=train_layer, + model_path=self.model_path, + input_spec=[x], + configs=configs) + + return train_layer + + def test_load_pruned_model(self): + train_layer = self.train_and_save() + train_layer.eval() + + infer_layer = fluid.dygraph.jit.load(self.model_path) + + x = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + self.assertTrue( + np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) + + def test_load_var_not_in_extra_var_info(self): + self.train_and_save() + + # chage extra var info + var_info_path = os.path.join(self.model_path, EXTRA_VAR_INFO_FILENAME) + with open(var_info_path, 'rb') as f: + extra_var_info = pickle.load(f) + extra_var_info.clear() + with open(var_info_path, 'wb') as f: + pickle.dump(extra_var_info, f, protocol=2) + + with self.assertRaises(RuntimeError): + fluid.dygraph.jit.load(self.model_path) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index a19b4d9c13a9e646da405babfbac98f7ed15f217..8780727e4cb276a989a8d04d05c6419a4874e7f5 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -13,6 +13,7 @@ from __future__ import division +import paddle import unittest import numpy as np from op_test import OpTest @@ -77,5 +78,36 @@ class TestKLDivLossOp4(TestKLDivLossOp): self.reduction = 'sum' +class TestKLDivLossDygraph(unittest.TestCase): + def run_kl_loss(self, reduction, shape=(5, 20)): + x = np.random.uniform(-10, 10, shape).astype('float64') + target = np.random.uniform(-10, 10, shape).astype('float64') + gt_loss = kldiv_loss(x, target, reduction) + + with paddle.fluid.dygraph.guard(): + kldiv_criterion = paddle.nn.KLDivLoss(reduction) + pred_loss = kldiv_criterion( + paddle.to_variable(x), paddle.to_variable(target)) + self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) + + def test_kl_loss_batchmean(self): + self.run_kl_loss('batchmean') + + def test_kl_loss_mean(self): + self.run_kl_loss('mean') + + def test_kl_loss_sum(self): + self.run_kl_loss('sum') + + def test_kl_loss_none(self): + self.run_kl_loss('none') + + def test_kl_loss_static_api(self): + input = paddle.fluid.data(name='input', shape=[5, 20]) + label = paddle.fluid.data(name='label', shape=[5, 20]) + + pred_loss = paddle.nn.functional.kl_div(input, label) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 9da70e85f01c0a13a87766a1befbda206c510cbe..1992a3bb39807a62966e245d24888cc074746e8d 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -283,6 +283,24 @@ class TestLayer(LayerTest): with self.assertRaises(ValueError): lm(base.to_variable(inp)) + def test_SyncBatchNorm(self): + if core.is_compiled_with_cuda(): + with self.static_graph(): + t = layers.data(name='t', shape=[-1, 3, 5, 5], dtype='float32') + my_sync_bn = paddle.nn.SyncBatchNorm(3) + ret = my_sync_bn(t) + static_ret = self.get_static_graph_result( + feed={'t': np.ones( + [3, 3, 5, 5], dtype='float32')}, + fetch_list=[ret])[0] + + with self.dynamic_graph(): + t = np.ones([3, 3, 5, 5], dtype='float32') + my_syncbn = paddle.nn.SyncBatchNorm(3) + dy_ret = my_syncbn(base.to_variable(t)) + dy_ret_value = dy_ret.numpy() + self.assertTrue(np.array_equal(static_ret, static_ret)) + def test_relu(self): with self.static_graph(): t = layers.data(name='t', shape=[3, 3], dtype='float32') @@ -298,21 +316,6 @@ class TestLayer(LayerTest): self.assertTrue(np.allclose(static_ret, dy_ret_value)) - def test_leakyrelu(self): - inputs = np.random.uniform(-1, 1, (10, 10)).astype('float32') - with self.static_graph(): - t = layers.data(name='t', shape=[10, 10], dtype='float32') - ret = layers.leaky_relu(t, alpha=0.01) - static_ret = self.get_static_graph_result( - feed={'t': inputs}, fetch_list=[ret])[0] - - with self.dynamic_graph(): - lrelu = paddle.nn.LeakyReLU(alpha=0.01) - dy_ret = lrelu(base.to_variable(inputs)) - dy_ret_value = dy_ret.numpy() - - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - def test_pad2d(self): with self.static_graph(): t = layers.data(name='t', shape=[-1, 3, 5, 5], dtype='float32') @@ -2660,13 +2663,6 @@ class TestBook(LayerTest): out = layers.brelu(input, t_min=1.0, t_max=20.0, name='brelu') return (out) - def make_leaky_relu(self): - with program_guard(fluid.default_main_program(), - fluid.default_startup_program()): - input = self._get_data(name="input", shape=[16], dtype="float32") - out = layers.leaky_relu(input, alpha=0.1, name='leaky_relu') - return (out) - def make_soft_relu(self): with program_guard(fluid.default_main_program(), fluid.default_startup_program()): @@ -3686,5 +3682,32 @@ class TestBook(LayerTest): batch_first=batch_first) +class TestMetricsDetectionMap(unittest.TestCase): + def test_detection_map(self): + program = fluid.Program() + with program_guard(program): + detect_res = fluid.layers.data( + name='detect_res', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + label = fluid.layers.data( + name='label', + shape=[10, 1], + append_batch_size=False, + dtype='float32') + box = fluid.layers.data( + name='bbox', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + map_eval = fluid.metrics.DetectionMAP( + detect_res, label, box, class_num=21) + cur_map, accm_map = map_eval.get_map_var() + self.assertIsNotNone(cur_map) + self.assertIsNotNone(accm_map) + print(str(program)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index 71b452d4a2dd192c756599eb24949084bfa0860e..9a2e7b85e5202288b62a640e41e06f131b0cba84 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -19,6 +19,7 @@ import math import numpy as np import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.fluid.framework as framework @@ -553,79 +554,459 @@ def reduce_lr_on_plateau(decay_rate, threshold, cooldown, patience, m, n, loss, class TestReduceLROnPlateauDecay(unittest.TestCase): - def test_dygraph_mode(self): - with fluid.dygraph.guard(): - # the decay rate must be less than 1.0 - with self.assertRaises(ValueError): - fluid.dygraph.ReduceLROnPlateau( - learning_rate=1.0, decay_rate=2.0) - # the mode must be "min" or "max" - with self.assertRaises(ValueError): - fluid.dygraph.ReduceLROnPlateau(learning_rate=1.0, mode="test") - # the threshold_mode must be "rel" or "abs" - with self.assertRaises(ValueError): - fluid.dygraph.ReduceLROnPlateau( - learning_rate=1.0, threshold_mode="test") - - base_lr = 1.0 - patience = 3 - cooldown = 1 - decay_rate = 0.5 - threshold = 1e-4 - linear = fluid.dygraph.Linear(10, 10) + def test_ReduceLR(self): + # the decay rate must be less than 1.0 + with self.assertRaises(ValueError): + paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, factor=2.0) + # the mode must be "min" or "max" + with self.assertRaises(ValueError): + paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, mode="test") + # the threshold_mode must be "rel" or "abs" + with self.assertRaises(ValueError): + paddle.optimizer.ReduceLROnPlateau( + learning_rate=1.0, threshold_mode="test") + with self.assertRaises(TypeError): + paddle.optimizer.ReduceLROnPlateau(learning_rate="test") + with self.assertRaises(TypeError): + paddle.optimizer.ReduceLROnPlateau(learning_rate=0.5).step("test") + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for place in places: for m, n in zip(['min', 'max', 'min', 'max'], ['rel', 'rel', 'abs', 'abs']): kwargs = { - 'learning_rate': base_lr, - 'decay_rate': decay_rate, - 'threshold': threshold, - 'verbose': True, - 'patience': patience, - 'cooldown': cooldown, + 'learning_rate': 1.0, 'mode': m, + 'factor': 0.5, + 'patience': 3, + 'threshold': 1e-4, 'threshold_mode': n, - 'eps': 1e-6 + 'cooldown': 1, + 'min_lr': 0, + 'epsilon': 1e-8, + 'verbose': False, } - print("class=" + fluid.dygraph.ReduceLROnPlateau.__name__ + - " kwargs=" + str(kwargs)) - lr = fluid.dygraph.ReduceLROnPlateau(**kwargs) - sgd = fluid.optimizer.SGD(learning_rate=lr, - parameter_list=linear.parameters()) - - best = float("-10000") if m == "max" else float("10000") - expected_lr = 1.0 - cooldown_counter = 0 - num_bad_epochs = 0 - var_list = [best, expected_lr, cooldown_counter, num_bad_epochs] - step_num = 0 - epoch_num = 0 - for epoch in range(30): - total_loss = 0 - - for batch_id in range(2): - step_num += 1 - x = fluid.dygraph.to_variable( - np.array([step_num]).astype('float32')) - loss = layers.sin(x) - sgd.minimize(loss) - total_loss += loss - - epoch_num += 1 - # get expected lr from fluid - avg_loss = total_loss / 1 - lr.step(avg_loss) - actual_lr = lr().numpy()[0] - - # get expected lr form python - expected_lr = reduce_lr_on_plateau(decay_rate, threshold, - cooldown, patience, m, n, - avg_loss, var_list) - self.assertEqual( - expected_lr, - actual_lr, - msg='Failed reduce lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'. - format(epoch_num, expected_lr, actual_lr)) + paddle.enable_static() + self._test_static(place, kwargs) + paddle.disable_static(place) + self._test_dygraph(place, kwargs) + paddle.enable_static() + + def _test_static(self, place, kwargs): + paddle.enable_static() + + best = float("-10000") if kwargs['mode'] == "max" else float("10000") + current_lr = 1.0 + cooldown_counter = 0 + num_bad_epochs = 0 + var_list = [best, current_lr, cooldown_counter, num_bad_epochs] + + main_prog = fluid.Program() + start_prog = fluid.Program() + with fluid.program_guard(main_prog, start_prog): + x = fluid.layers.create_global_var( + [1], 1, 'float32', persistable=True) + paddle.increment(x) + loss = paddle.sin(x) + scheduler = paddle.optimizer.ReduceLROnPlateau(**kwargs) + adam = fluid.optimizer.Adam(learning_rate=scheduler) + adam.minimize(loss) + lr_var = adam._global_learning_rate() + test_prog = main_prog.clone() + + exe = fluid.Executor(place) + exe.run(start_prog) + + for epoch in range(20): + for batch_id in range(1): + out, actual_lr = exe.run(main_prog, + fetch_list=[loss.name, lr_var.name]) + expected_lr = reduce_lr_on_plateau( + kwargs['factor'], kwargs['threshold'], kwargs['cooldown'], + kwargs['patience'], kwargs['mode'], + kwargs['threshold_mode'], out[0], var_list) + + scheduler.step(out[0]) + actual_lr = scheduler() + self.assertEqual(actual_lr, np.array(expected_lr)) + + for epoch in range(10): + for batch_id in range(1): + out, actual_lr = exe.run(test_prog, + fetch_list=[loss.name, lr_var.name]) + expected_lr = reduce_lr_on_plateau( + kwargs['factor'], kwargs['threshold'], kwargs['cooldown'], + kwargs['patience'], kwargs['mode'], + kwargs['threshold_mode'], out[0], var_list) + scheduler.step(out[0]) + actual_lr = scheduler() + self.assertEqual(actual_lr, np.array(expected_lr)) + + def _test_dygraph(self, place, kwargs): + paddle.disable_static(place) + + best = float("-10000") if kwargs['mode'] == "max" else float("10000") + current_lr = 1.0 + cooldown_counter = 0 + num_bad_epochs = 0 + var_list = [best, current_lr, cooldown_counter, num_bad_epochs] + + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.ReduceLROnPlateau(**kwargs) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, + parameter_list=linear.parameters()) + + for epoch in range(20): + for batch_id in range(1): + x = paddle.to_tensor(epoch).astype('float32') + loss = paddle.sin(x) + loss.backward() + sgd.minimize(loss) + + scheduler.step(loss) + # get lr from paddle + current_lr = scheduler() + # get lr form python + expected_lr = reduce_lr_on_plateau( + kwargs['factor'], kwargs['threshold'], kwargs['cooldown'], + kwargs['patience'], kwargs['mode'], kwargs['threshold_mode'], + loss, var_list) + self.assertEqual(current_lr, expected_lr) + state_dict = sgd.state_dict() + scheduler1 = paddle.optimizer.ReduceLROnPlateau(**kwargs) + sgd1 = paddle.optimizer.SGD(learning_rate=scheduler1, + parameter_list=linear.parameters()) + sgd1.set_dict(state_dict) + self.assertEqual(scheduler.cooldown_counter, + scheduler1.cooldown_counter) + self.assertEqual(scheduler.best.numpy()[0], scheduler1.best) + self.assertEqual(scheduler.num_bad_epochs, scheduler1.num_bad_epochs) + self.assertEqual(scheduler.last_epoch, scheduler1.last_epoch) + self.assertEqual(scheduler.last_lr, scheduler1.last_lr) + + +def noam_lr(epoch_num, d_model, warmup_steps, learning_rate=1.0, verbose=False): + if epoch_num == 0: + a = 1 + else: + a = math.pow(epoch_num, -0.5) + b = math.pow(warmup_steps, -1.5) * epoch_num + return learning_rate * math.pow(d_model, -0.5) * min(a, b) + + +def lambda_lr(epoch_num, learning_rate, lr_lambda, verbose=False): + return learning_rate * lr_lambda(epoch_num) + + +def piecewise_lr(epoch_num, boundaries, values, verbose=False): + assert len(boundaries) + 1 == len(values) + for i in range(len(boundaries)): + if epoch_num < boundaries[i]: + return values[i] + return values[len(values) - 1] + + +def exponential_lr(epoch_num, learning_rate, gamma, verbose=False): + return learning_rate * gamma**epoch_num + + +def natural_exp_lr(epoch_num, learning_rate, gamma, verbose=False): + return learning_rate * math.exp(-1 * gamma * epoch_num) + + +def inverse_time_lr(epoch_num, learning_rate, gamma, verbose=False): + return learning_rate / (1 + gamma * epoch_num) + + +def polynomial_lr(epoch_num, + learning_rate, + decay_steps, + end_lr=0.0001, + power=1.0, + cycle=False, + verbose=False): + + if cycle: + div = math.ceil(epoch_num / float(decay_steps)) + if epoch_num == 0: + div = 1 + decay_steps = decay_steps * div + else: + epoch_num = min(epoch_num, decay_steps) + return (learning_rate - end_lr) * ( + (1 - float(epoch_num) / float(decay_steps))**power) + end_lr + + def get_lr(self): + if self.last_epoch == 0: + return self.base_lr + elif (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0: + return self.last_lr + (self.base_lr - self.eta_min) * (1 - math.cos( + math.pi / self.T_max)) / 2 + + return (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / ( + 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max)) * ( + self.last_lr - self.eta_min) + self.eta_min + + +cosine_annealing_lr_current = None + + +def cosine_annealing_lr(epoch_num, + learning_rate, + T_max, + eta_min=0, + verbose=False): + global cosine_annealing_lr_current + if epoch_num == 0: + cosine_annealing_lr_current = learning_rate + elif (epoch_num - 1 - T_max) % (2 * T_max) == 0: + cosine_annealing_lr_current = cosine_annealing_lr_current + ( + learning_rate - eta_min) * (1 - math.cos(math.pi / float(T_max)) + ) / 2 + else: + cosine_annealing_lr_current = (1 + math.cos( + math.pi * epoch_num / float(T_max))) / (1 + math.cos(math.pi * ( + epoch_num - 1) / float(T_max))) * (cosine_annealing_lr_current - + eta_min) + eta_min + return cosine_annealing_lr_current + + +def linear_warmup_lr(epoch_num, + learning_rate, + warmup_steps, + start_lr, + end_lr, + verbose=False): + if epoch_num < warmup_steps: + return start_lr + (end_lr - start_lr) * (float(epoch_num) / + float(warmup_steps)) + else: + return learning_rate + + +def multi_step_lr(epoch_num, + learning_rate, + milestones, + gamma=0.1, + verbose=False): + for i in range(len(milestones)): + if epoch_num < milestones[i]: + return learning_rate * (gamma**i) + return learning_rate * (gamma**len(milestones)) + + +def step_lr(epoch_num, learning_rate, step_size, gamma=0.1, verbose=False): + return learning_rate * math.pow(gamma, epoch_num // step_size) + + +class TestLRScheduler(unittest.TestCase): + def _test_static(self, python_func, paddle_api, kwarg, place): + main_prog = fluid.Program() + start_prog = fluid.Program() + with fluid.program_guard(main_prog, start_prog): + x = fluid.data(name='x', shape=[3, 4, 5]) + y = fluid.data(name='y', shape=[3, 4, 5]) + z = fluid.layers.fc(x, 100) + loss = fluid.layers.mean(z) + scheduler = paddle_api(**kwarg) + adam = fluid.optimizer.Adam(learning_rate=scheduler) + adam.minimize(loss) + lr_var = adam._global_learning_rate() + test_prog = main_prog.clone() + + num = 0 + exe = fluid.Executor(place) + exe.run(start_prog) + for epoch in range(5): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + self.assertEqual(out, np.array(python_func(num, **kwarg))) + scheduler.step() + num += 1 + + for epoch in range(5): + for batch_id in range(2): + out = exe.run( + test_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + self.assertEqual(out, np.array(python_func(num, **kwarg))) + scheduler.step() + num += 1 + + if isinstance(place, fluid.CPUPlace): + compiled_train_prog = fluid.CompiledProgram( + main_prog).with_data_parallel( + loss_name=loss.name, places=fluid.cpu_places(4)) + for epoch in range(5): + python_result = python_func(num, **kwarg) + for batch_id in range(2): + _ = exe.run( + compiled_train_prog, + feed={ + 'x': np.random.randn(12, 4, 5).astype('float32'), + 'y': np.random.randn(12, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scopes = compiled_train_prog._executor.local_scopes() + out = np.array(scopes[0].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[1].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[2].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[3].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + scheduler.step() + num += 1 + + compiled_test_prog = fluid.CompiledProgram( + test_prog).with_data_parallel( + loss_name=loss.name, + share_vars_from=compiled_train_prog, + places=fluid.cpu_places(4)) + for epoch in range(5): + python_result = python_func(num, **kwarg) + for batch_id in range(2): + _ = exe.run( + compiled_test_prog, + feed={ + 'x': np.random.randn(12, 4, 5).astype('float32'), + 'y': np.random.randn(12, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scopes = compiled_test_prog._executor.local_scopes() + out = np.array(scopes[0].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[1].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[2].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + out = np.array(scopes[3].var(lr_var.name).get_tensor()) + self.assertEqual(out, np.array(python_result)) + scheduler.step() + num += 1 + + def _test_dygraph(self, python_func, paddle_api, kwarg, place): + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle_api(**kwarg) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, + parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + + self.assertAlmostEqual(sgd.current_step_lr(), + python_func(epoch, **kwarg)) + if paddle_api.__name__ != "CosineAnnealingLR": + scheduler.step() + else: + scheduler.step(epoch + 1) + + def test_scheduler(self): + with self.assertRaises(NotImplementedError): + paddle.optimizer.lr_scheduler._LRScheduler().step() + with self.assertRaises(TypeError): + paddle.optimizer.MultiStepLR( + learning_rate="test", milestones=[1, 2, 3]) + with self.assertRaises(TypeError): + paddle.optimizer.MultiStepLR(learning_rate=0.5, milestones='test') + with self.assertRaises(ValueError): + paddle.optimizer.MultiStepLR( + learning_rate=0.5, milestones=[3, 2, 1]) + with self.assertRaises(ValueError): + paddle.optimizer.MultiStepLR( + learning_rate=0.5, milestones=[1, 2, 3], gamma=2) + + func_api_kwargs = [(noam_lr, paddle.optimizer.NoamLR, { + "d_model": 0.01, + "warmup_steps": 100, + "verbose": False + }), (piecewise_lr, paddle.optimizer.PiecewiseLR, { + "boundaries": [3, 6, 9, 15, 20], + "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + "verbose": False + }), (natural_exp_lr, paddle.optimizer.NaturalExpLR, { + "learning_rate": 0.5, + "gamma": 0.1, + "verbose": False + }), (inverse_time_lr, paddle.optimizer.InverseTimeLR, { + "learning_rate": 0.5, + "gamma": 0.1, + "verbose": True + }), (polynomial_lr, paddle.optimizer.PolynomialLR, { + "learning_rate": 0.5, + "decay_steps": 20, + "end_lr": 0, + "power": 1.0, + "cycle": False, + "verbose": False + }), (polynomial_lr, paddle.optimizer.PolynomialLR, { + "learning_rate": 0.5, + "decay_steps": 20, + "end_lr": 0, + "power": 1.0, + "cycle": True, + "verbose": False + }), (linear_warmup_lr, paddle.optimizer.LinearLrWarmup, { + 'learning_rate': 0.5, + 'warmup_steps': 20, + 'start_lr': 0, + 'end_lr': 0.5, + "verbose": False + }), (exponential_lr, paddle.optimizer.ExponentialLR, { + "learning_rate": 0.5, + "gamma": 0.9, + "verbose": False + }), (multi_step_lr, paddle.optimizer.MultiStepLR, { + "learning_rate": 0.5, + "milestones": [3, 6, 9, 15, 20], + "gamma": 0.8, + "verbose": True + }), (step_lr, paddle.optimizer.StepLR, { + "learning_rate": 0.5, + "step_size": 2, + "gamma": 0.8, + "verbose": False + }), (lambda_lr, paddle.optimizer.LambdaLR, { + "learning_rate": 0.5, + "lr_lambda": lambda x: 0.95**x, + "verbose": False + }), (cosine_annealing_lr, paddle.optimizer.CosineAnnealingLR, { + "learning_rate": 0.5, + "T_max": 10, + "verbose": True + })] + + for python_func, paddle_api, kwarg in func_api_kwargs: + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for place in places: + paddle.enable_static() + self._test_static(python_func, paddle_api, kwarg, place) + paddle.disable_static(place) + self._test_dygraph(python_func, paddle_api, kwarg, place) + paddle.enable_static() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py index 068993c4c1c5e770dd6cf7dc7a35b9ccc3f49aae..6d1f42111eebff0f469317ddf2a9ec7698a7ae1e 100644 --- a/python/paddle/fluid/tests/unittests/test_linspace.py +++ b/python/paddle/fluid/tests/unittests/test_linspace.py @@ -32,6 +32,7 @@ class TestLinspaceOpCommonCase(OpTest): 'Stop': np.array([10]).astype(dtype), 'Num': np.array([11]).astype('int32') } + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} self.outputs = {'Out': np.arange(0, 11).astype(dtype)} @@ -48,6 +49,7 @@ class TestLinspaceOpReverseCase(OpTest): 'Stop': np.array([0]).astype(dtype), 'Num': np.array([11]).astype('int32') } + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} self.outputs = {'Out': np.arange(10, -1, -1).astype(dtype)} @@ -64,6 +66,7 @@ class TestLinspaceOpNumOneCase(OpTest): 'Stop': np.array([0]).astype(dtype), 'Num': np.array([1]).astype('int32') } + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} self.outputs = {'Out': np.array(10, dtype=dtype)} @@ -72,6 +75,26 @@ class TestLinspaceOpNumOneCase(OpTest): class TestLinspaceAPI(unittest.TestCase): + def test_variable_input1(self): + start = paddle.full(shape=[1], fill_value=0, dtype='float32') + stop = paddle.full(shape=[1], fill_value=10, dtype='float32') + num = paddle.full(shape=[1], fill_value=5, dtype='int32') + out = paddle.linspace(start, stop, num, dtype='float32') + exe = fluid.Executor(place=fluid.CPUPlace()) + res = exe.run(fluid.default_main_program(), fetch_list=[out]) + np_res = np.linspace(0, 10, 5, dtype='float32') + self.assertEqual((res == np_res).all(), True) + + def test_variable_input2(self): + paddle.disable_static() + start = paddle.full(shape=[1], fill_value=0, dtype='float32') + stop = paddle.full(shape=[1], fill_value=10, dtype='float32') + num = paddle.full(shape=[1], fill_value=5, dtype='int32') + out = paddle.linspace(start, stop, num, dtype='float32') + np_res = np.linspace(0, 10, 5, dtype='float32') + self.assertEqual((out.numpy() == np_res).all(), True) + paddle.enable_static() + def test_dtype(self): out_1 = paddle.linspace(0, 10, 5, dtype='float32') out_2 = paddle.linspace(0, 10, 5, dtype=np.float32) @@ -89,10 +112,16 @@ class TestLinspaceAPI(unittest.TestCase): def test_imperative(self): paddle.disable_static() - out = paddle.linspace(0, 10, 5, dtype='float32') - np_out = np.linspace(0, 10, 5, dtype='float32') + out1 = paddle.linspace(0, 10, 5, dtype='float32') + np_out1 = np.linspace(0, 10, 5, dtype='float32') + out2 = paddle.linspace(0, 10, 5, dtype='int32') + np_out2 = np.linspace(0, 10, 5, dtype='int32') + out3 = paddle.linspace(0, 10, 200, dtype='int32') + np_out3 = np.linspace(0, 10, 200, dtype='int32') paddle.enable_static() - self.assertEqual((out.numpy() == np_out).all(), True) + self.assertEqual((out1.numpy() == np_out1).all(), True) + self.assertEqual((out2.numpy() == np_out2).all(), True) + self.assertEqual((out3.numpy() == np_out3).all(), True) class TestLinspaceOpError(unittest.TestCase): @@ -100,7 +129,12 @@ class TestLinspaceOpError(unittest.TestCase): with program_guard(Program(), Program()): def test_dtype(): - fluid.layers.linspace(0, 10, 1, dtype="int32") + fluid.layers.linspace(0, 10, 1, dtype="int8") + + self.assertRaises(TypeError, test_dtype) + + def test_dtype(): + fluid.layers.linspace(0, 10, 1.33, dtype="int32") self.assertRaises(TypeError, test_dtype) @@ -120,20 +154,20 @@ class TestLinspaceOpError(unittest.TestCase): self.assertRaises(TypeError, test_step_dtype) def test_start_dtype(): - start = fluid.data(shape=[1], type="int32", name="start") + start = fluid.data(shape=[1], dtype="int32", name="start") fluid.layers.linspace(start, 10, 1, dtype="float32") self.assertRaises(TypeError, test_start_dtype) def test_end_dtype(): - end = fluid.data(shape=[1], type="int32", name="end") + end = fluid.data(shape=[1], dtype="int32", name="end") fluid.layers.linspace(0, end, 1, dtype="float32") self.assertRaises(TypeError, test_end_dtype) - def test_step_dtype(): - step = fluid.data(shape=[1], type="int32", name="step") - fluid.layers.linspace(0, 10, step, dtype="float32") + def test_num_dtype(): + num = fluid.data(shape=[1], dtype="int32", name="step") + fluid.layers.linspace(0, 10, num, dtype="float32") self.assertRaises(TypeError, test_step_dtype) diff --git a/python/paddle/fluid/tests/unittests/test_log_softmax.py b/python/paddle/fluid/tests/unittests/test_log_softmax.py index 2b77624734d335bd999754b378971bcc5c945fa5..e3d7003ecedb60f9b4f9a542ed08ca88d894d24a 100644 --- a/python/paddle/fluid/tests/unittests/test_log_softmax.py +++ b/python/paddle/fluid/tests/unittests/test_log_softmax.py @@ -14,93 +14,136 @@ import unittest import numpy as np -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.nn as nn -import paddle.nn.functional as functional +from op_test import OpTest +import paddle +import paddle.nn.functional as F +np.random.seed(10) -def stable_softmax(x): + +def ref_log_softmax(x): shiftx = (x - np.max(x)) - exps = np.exp(shiftx) - return exps / np.sum(exps) + out = shiftx - np.log(np.exp(shiftx).sum()) + return out -def ref_log_softmax(x, axis=None, dtype=None): - x_t = x.copy() - if dtype is not None: - x_t = x_t.astype(dtype) - if axis is None: - axis = -1 - out = np.apply_along_axis(stable_softmax, axis, x_t) - return np.log(out) +def ref_log_softmax_grad(x, axis): + if axis < 0: + axis += len(x.shape) + out = np.apply_along_axis(ref_log_softmax, axis, x) + axis_dim = x.shape[axis] + dout = np.full_like(x, fill_value=1. / x.size) + dx = dout - np.exp(out) * dout.copy().sum(axis=axis, keepdims=True).repeat( + axis_dim, axis=axis) + return dx -class TestNNLogSoftmaxAPI(unittest.TestCase): +class TestLogSoftmaxOp(OpTest): def setUp(self): - self.init_data() + self.op_type = 'log_softmax' + self.dtype = 'float64' + self.shape = [2, 3, 4, 5] + self.axis = -1 + self.set_attrs() - def init_data(self): - self.x_shape = [2, 3, 4, 5] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) + x = np.random.uniform(0.1, 1., self.shape).astype(self.dtype) + out = np.apply_along_axis(ref_log_softmax, self.axis, x) + self.x_grad = ref_log_softmax_grad(x, self.axis) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {'axis': self.axis} + + def set_attrs(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], ['Out'], user_defined_grads=[self.x_grad]) + + +class TestLogSoftmaxShape(TestLogSoftmaxOp): + def set_attrs(self): + self.shape = [12, 10] - def check_api(self, place=fluid.CPUPlace(), axis=None): - ref_out = ref_log_softmax(self.x, axis) - main_program = fluid.Program() - mylogsoftmax = nn.LogSoftmax(axis) - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - y = mylogsoftmax(x) - exe = fluid.Executor(place) - out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) +class TestLogSoftmaxAxis(TestLogSoftmaxOp): + def set_attrs(self): + self.axis = 1 + + +class TestNNLogSoftmaxAPI(unittest.TestCase): + def setUp(self): + self.x_shape = [2, 3, 4, 5] + self.x = np.random.uniform(-1., 1., self.x_shape).astype(np.float32) + self.place = paddle.CUDAPlace(0) \ + if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def check_api(self, axis=-1): + ref_out = np.apply_along_axis(ref_log_softmax, axis, self.x) + + logsoftmax = paddle.nn.LogSoftmax(axis) + # test static api + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data(name='x', shape=self.x_shape) + y = logsoftmax(x) + exe = paddle.static.Executor(self.place) + out = exe.run(feed={'x': self.x}, fetch_list=[y]) self.assertTrue(np.allclose(out[0], ref_out)) - with fluid.dygraph.guard(place): - x = fluid.dygraph.to_variable(self.x) - y = mylogsoftmax(x) + # test dygrapg api + paddle.disable_static() + x = paddle.to_variable(self.x) + y = logsoftmax(x) self.assertTrue(np.allclose(y.numpy(), ref_out)) + paddle.enable_static() def test_check_api(self): - places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - for axis in [None, 2]: - self.check_api(place, axis) + for axis in [-1, 1]: + self.check_api(axis) class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): def setUp(self): - self.init_data() - - def init_data(self): self.x_shape = [2, 3, 4, 5] self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - - def check_api(self, place=fluid.CPUPlace(), axis=None, dtype=None): - ref_out = ref_log_softmax(self.x, axis, dtype) - main_program = fluid.Program() - mylogsoftmax = nn.LogSoftmax(axis) - with fluid.program_guard(main_program): - x = fluid.data(name='x', shape=self.x_shape) - y = functional.log_softmax(x, axis, dtype) - exe = fluid.Executor(place) - out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) + self.place = paddle.CUDAPlace(0) \ + if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def check_api(self, axis=-1, dtype=None): + x = self.x.copy() + if dtype is not None: + x = x.astype(dtype) + ref_out = np.apply_along_axis(ref_log_softmax, axis, x) + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data(name='x', shape=self.x_shape) + y = F.log_softmax(x, axis, dtype) + exe = paddle.static.Executor(self.place) + out = exe.run(feed={'x': self.x}, fetch_list=[y]) self.assertTrue(np.allclose(out[0], ref_out)) - with fluid.dygraph.guard(place): - x = fluid.dygraph.to_variable(self.x) - y = functional.log_softmax(x, axis, dtype) - self.assertTrue(np.allclose(y.numpy(), ref_out)) + paddle.disable_static() + x = paddle.to_variable(self.x) + y = F.log_softmax(x, axis, dtype) + self.assertTrue(np.allclose(y.numpy(), ref_out), True) + paddle.enable_static() def test_check_api(self): - places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - self.check_api(place, None, None) - self.check_api(place, None, np.float64) + for axis in [-1, 1]: + self.check_api(axis) + self.check_api(-1, 'float64') + + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data(name='X1', shape=[100], dtype='int32') + self.assertRaises(TypeError, F.log_softmax, x) + + x = paddle.data(name='X2', shape=[100], dtype='float32') + self.assertRaises(TypeError, F.log_softmax, x, dtype='int32') if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_logical_op.py b/python/paddle/fluid/tests/unittests/test_logical_op.py old mode 100644 new mode 100755 index 8f0049a8d30d0e1fed1d27cf6e13c036e33678d0..b26b6ab6c3ce7cc68ad877b183eb8733293b9228 --- a/python/paddle/fluid/tests/unittests/test_logical_op.py +++ b/python/paddle/fluid/tests/unittests/test_logical_op.py @@ -17,8 +17,9 @@ from __future__ import print_function import op_test import unittest import numpy as np +import paddle import paddle.fluid as fluid -from paddle.fluid import Program, program_guard +from paddle.static import Program, program_guard def create_test_class(op_type, callback, binary_op=True): @@ -42,6 +43,8 @@ def create_test_class(op_type, callback, binary_op=True): def test_error(self): with program_guard(Program(), Program()): + + # test 1 type error, x, y must be bool type x = fluid.layers.data(name='x', shape=[2], dtype='bool') y = fluid.layers.data(name='y', shape=[2], dtype='bool') a = fluid.layers.data(name='a', shape=[2], dtype='int32') @@ -54,7 +57,16 @@ def create_test_class(op_type, callback, binary_op=True): self.assertRaises(TypeError, op, x=x, out=1) self.assertRaises(TypeError, op, x=a) - Cls.__name__ = op_type + # test 2 type error, x, y must be same shape + x_data = fluid.layers.data( + name='x_data', shape=[2], dtype='bool') + y_data = fluid.layers.data( + name='y_data', shape=[2, 2], dtype='bool') + + if self.op_type != "logical_not": + self.assertRaises(TypeError, op, x=x_data, y=y_data, out=1) + self.assertRaises(TypeError, op, x=y_data, y=x_data) + globals()[op_type] = Cls diff --git a/python/paddle/fluid/tests/unittests/test_logsumexp.py b/python/paddle/fluid/tests/unittests/test_logsumexp.py index 508b4a7b72da8affbc7ddf590b8142a41d1f3191..c2201a52605bc87246fb9c8734494b19f83ff180 100644 --- a/python/paddle/fluid/tests/unittests/test_logsumexp.py +++ b/python/paddle/fluid/tests/unittests/test_logsumexp.py @@ -12,64 +12,128 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function import paddle -import paddle.fluid as fluid import unittest import numpy as np from op_test import OpTest -from paddle.fluid import Program, program_guard -from paddle.fluid.layer_helper import LayerHelper -class TestLogSumOpError(unittest.TestCase): +def ref_logsumexp(x, axis=None, keepdim=False, reduce_all=False): + if isinstance(axis, int): + axis = (axis, ) + elif isinstance(axis, list): + axis = tuple(axis) + if reduce_all: + axis = None + out = np.log(np.exp(x).sum(axis=axis, keepdims=keepdim)) + return out + + +class TestLogsumexp(OpTest): + def setUp(self): + self.op_type = 'logsumexp' + self.shape = [2, 3, 4, 5] + self.dtype = 'float64' + self.axis = [-1] + self.keepdim = False + self.reduce_all = False + self.set_attrs() + + np.random.seed(10) + x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + out = ref_logsumexp(x, self.axis, self.keepdim, self.reduce_all) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = { + 'dim': self.axis, + 'keep_dim': self.keepdim, + 'reduce_all': self.reduce_all + } + + def set_attrs(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], ['Out']) + + +class TestLogsumexp_shape(TestLogsumexp): + def set_attrs(self): + self.shape = [4, 5, 6] + + +class TestLogsumexp_axis(TestLogsumexp): + def set_attrs(self): + self.axis = [0, -1] + + +class TestLogsumexp_axis_all(TestLogsumexp): + def set_attrs(self): + self.axis = [0, 1, 2, 3] + + +class TestLogsumexp_keepdim(TestLogsumexp): + def set_attrs(self): + self.keepdim = True + + +class TestLogsumexp_reduce_all(TestLogsumexp): + def set_attrs(self): + self.reduce_all = True + + +class TestLogsumexpError(unittest.TestCase): def test_errors(self): - with program_guard(Program(), Program()): - - x1 = fluid.layers.data(name='x1', shape=[120], dtype="uint8") - self.assertRaises(Exception, paddle.logsumexp, x1) - - x2 = fluid.layers.data(name='x2', shape=[2, 3], dtype="int") - self.assertRaises(Exception, paddle.logsumexp, x2) - - x3 = fluid.layers.data(name='x3', shape=[3], dtype="float16") - self.assertRaises(Exception, paddle.logsumexp, x3) - - self.assertRaises(AssertionError, paddle.logsumexp, None) - - -class TestLogSumExpOp(unittest.TestCase): - def test_dygraph(self): - with fluid.dygraph.guard(): - np_x = np.random.uniform(0.1, 1, [123]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - self.assertTrue( - np.allclose( - paddle.logsumexp(x).numpy(), np.log(np.sum(np.exp(np_x))))) - - np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - self.assertTrue( - np.allclose( - paddle.logsumexp( - x, dim=[1, 2]).numpy(), - np.log(np.sum(np.exp(np_x), axis=(1, 2))))) - - np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - self.assertTrue( - np.allclose( - paddle.logsumexp( - x, dim=[2]).numpy(), - np.log(np.sum(np.exp(np_x), axis=(2))))) - - np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - self.assertTrue( - np.allclose( - paddle.logsumexp( - x, keepdim=True).numpy(), - np.log(np.sum(np.exp(np_x), keepdims=True)))) + with paddle.static.program_guard(paddle.static.Program()): + self.assertRaises(TypeError, paddle.logsumexp, 1) + x1 = paddle.data(name='x1', shape=[120], dtype="int32") + self.assertRaises(TypeError, paddle.logsumexp, x1) + + +class TestLogsumexpAPI(unittest.TestCase): + def setUp(self): + self.shape = [2, 3, 4, 5] + self.x = np.random.uniform(-1, 1, self.shape).astype(np.float32) + self.place = paddle.CUDAPlace(0) if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def api_case(self, axis=None, keepdim=False): + out_ref = ref_logsumexp(self.x, axis, keepdim) + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.shape) + out = paddle.logsumexp(x, axis, keepdim) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x}, fetch_list=[out]) + self.assertTrue(np.allclose(res[0], out_ref)) + + paddle.disable_static(self.place) + x = paddle.to_variable(self.x) + out = paddle.logsumexp(x, axis, keepdim) + self.assertTrue(np.allclose(out.numpy(), out_ref)) + paddle.enable_static() + + def test_api(self): + self.api_case() + self.api_case(2) + self.api_case([-1]) + self.api_case([2, -3]) + self.api_case((0, 1, -1)) + self.api_case(keepdim=True) + + def test_alias(self): + paddle.disable_static(self.place) + x = paddle.to_variable(self.x) + out1 = paddle.logsumexp(x) + out2 = paddle.tensor.logsumexp(x) + out3 = paddle.tensor.math.logsumexp(x) + out_ref = ref_logsumexp(self.x) + for out in [out1, out2, out3]: + self.assertTrue(np.allclose(out.numpy(), out_ref)) + paddle.enable_static() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py index d4189eca0369702120f079dae9067a58da1e9597..90430bbce4d1896c8fdbb829230f2ad8a691adff 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py @@ -20,15 +20,14 @@ import numpy as np import paddle.fluid.core as core from op_test import OpTest import paddle.fluid as fluid +import paddle.fluid.layers as layers SIGMOID_THRESHOLD_MIN = -40.0 SIGMOID_THRESHOLD_MAX = 13.0 EXP_MAX_INPUT = 40.0 -def lstm_naive( - input, - w, ): +def lstm_naive(input, w): seq_len, batch_size, hidden_size = input.shape offset = 0 @@ -86,8 +85,8 @@ def lstm_naive( return (2. / (1. + np.exp(y))) - 1. output = [] - pre_h = np.zeros((batch_size, hidden_size), dtype=input.dtype) - pre_c = np.zeros((batch_size, hidden_size), dtype=input.dtype) + pre_h = np.zeros((1, batch_size, hidden_size), dtype=input.dtype) + pre_c = np.zeros((1, batch_size, hidden_size), dtype=input.dtype) for i in range(seq_len): emb_1 = input[i] @@ -110,7 +109,6 @@ def lstm_naive( output = np.concatenate(output, -1) output = output.reshape((batch_size, -1, hidden_size)) - output = output.transpose((1, 0, 2)) return output, pre_h, pre_c @@ -119,11 +117,12 @@ def lstm_naive( @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestCUDNNLstmOp(OpTest): + # TODO(GaoWei8):when input dtype is fp64, precision threshold should be removed. def setUp(self): self.op_type = "cudnn_lstm" - self.dtype = np.float32 + self.dtype = np.float64 - num_steps = 20 + seq_length = 20 batch_size = 5 hidden_size = 20 @@ -133,33 +132,24 @@ class TestCUDNNLstmOp(OpTest): weight_size += hidden_size * 8 input = np.random.uniform( - low=-0.1, high=0.1, size=(num_steps, batch_size, + low=-0.1, high=0.1, size=(seq_length, batch_size, hidden_size)).astype(self.dtype) flat_w = np.random.uniform( low=-0.1, high=0.1, size=(weight_size)).astype(self.dtype) output, last_hidden, last_cell = lstm_naive(input, flat_w) - init_h = np.zeros((batch_size, hidden_size), dtype=np.float32) - init_c = np.zeros((batch_size, hidden_size), dtype=np.float32) - scope = core.Scope() - program = fluid.Program() - block = program.global_block() - - cache_temp = block.create_var( - name="Cache", - persistable=True, - type=core.VarDesc.VarType.RAW, - stop_gradient=True) + init_h = np.zeros((1, batch_size, hidden_size), dtype=np.float64) + init_c = np.zeros((1, batch_size, hidden_size), dtype=np.float64) + state_out = np.ndarray((300)).astype("uint8") + self.inputs = { - 'Input': OpTest.np_dtype_to_fluid_dtype(input), - 'W': OpTest.np_dtype_to_fluid_dtype(flat_w), - 'InitH': OpTest.np_dtype_to_fluid_dtype(init_h), - 'InitC': OpTest.np_dtype_to_fluid_dtype(init_c), + 'Input': input, + 'W': flat_w, + 'InitH': init_h, + 'InitC': init_c } - self.cache_name_list = ['Cache'] self.attrs = { - 'max_len': num_steps, 'dropout_prob': 0.0, 'is_bidirec': False, 'input_size': hidden_size, @@ -168,22 +158,61 @@ class TestCUDNNLstmOp(OpTest): } self.outputs = { 'Out': output, - "last_h": last_hidden, - 'last_c': last_cell + "LastH": last_hidden, + 'LastC': last_cell, + 'Reserve': np.ndarray((400)).astype("uint8"), + 'StateOut': state_out } def test_output_with_place(self): # depend on the scope structure place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-5, check_dygraph=False) + self.check_output_with_place( + place, no_check_set=['Reserve', 'StateOut']) def test_grad_with_place(self): # depend on the scope structure place = core.CUDAPlace(0) self.check_grad_with_place( place, - set(['Input', 'W', 'InitH', 'InitC']), ['Out', 'last_h', 'last_c'], - check_dygraph=False) + set(['Input', 'W', 'InitH', 'InitC']), ['Out', 'LastH', 'LastC'], + max_relative_error=1e-4) + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNlstmAPI(unittest.TestCase): + def test_lstm(self): + seq_len = 20 + batch_size = 5 + hidden_size = 20 + dropout_prob = 0.0 + num_layers = 1 + input = fluid.data( + name='input', + shape=[seq_len, batch_size, hidden_size], + dtype='float64') + init_h = layers.fill_constant([num_layers, batch_size, hidden_size], + 'float64', 0.0) + init_c = layers.fill_constant([num_layers, batch_size, hidden_size], + 'float64', 0.0) + rnn_out, last_h, last_c = layers.lstm(input, init_h, init_c, seq_len, + hidden_size, num_layers, + dropout_prob) + exe = fluid.Executor(fluid.CUDAPlace(0)) + exe.run(fluid.default_startup_program()) + input_i = np.random.uniform( + low=-0.1, high=0.1, size=(seq_len, batch_size, + hidden_size)).astype("float64") + out = exe.run(fluid.default_main_program(), + feed={'input': input_i}, + fetch_list=[rnn_out, last_h, last_c, 'cudnn_lstm_0.w_0']) + + output, last_hidden, last_cell = lstm_naive(input_i, out[3]) + + self.assertTrue(np.allclose(output, out[0], atol=1e-5)) + self.assertTrue(np.allclose(last_hidden, out[1], atol=1e-5)) + self.assertTrue(np.allclose(last_cell, out[2], atol=1e-5)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_masked_select_op.py b/python/paddle/fluid/tests/unittests/test_masked_select_op.py new file mode 100644 index 0000000000000000000000000000000000000000..259a36e30d9a9c1852ff3800d5240ce7e7bb0e26 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_masked_select_op.py @@ -0,0 +1,124 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid as fluid +import paddle + + +def np_masked_select(x, mask): + result = np.empty(shape=(0), dtype=x.dtype) + for ele, ma in zip(np.nditer(x), np.nditer(mask)): + if ma: + result = np.append(result, ele) + return result.flatten() + + +class TestMaskedSelectOp(OpTest): + def setUp(self): + self.init() + self.op_type = "masked_select" + x = np.random.random(self.shape).astype("float64") + mask = np.array(np.random.randint(2, size=self.shape, dtype=bool)) + out = np_masked_select(x, mask) + self.inputs = {'X': x, 'Mask': mask} + self.outputs = {'Y': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y') + + def init(self): + self.shape = (50, 3) + + +class TestMaskedSelectOp1(TestMaskedSelectOp): + def init(self): + self.shape = (6, 8, 9, 18) + + +class TestMaskedSelectOp2(TestMaskedSelectOp): + def init(self): + self.shape = (168, ) + + +class TestMaskedSelectAPI(unittest.TestCase): + def test_imperative_mode(self): + paddle.disable_static() + shape = (88, 6, 8) + np_x = np.random.random(shape).astype('float32') + np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) + x = paddle.to_tensor(np_x) + mask = paddle.to_tensor(np_mask) + out = paddle.masked_select(x, mask) + np_out = np_masked_select(np_x, np_mask) + self.assertEqual(np.allclose(out.numpy(), np_out), True) + paddle.enable_static() + + def test_static_mode(self): + shape = [8, 9, 6] + x = paddle.data(shape=shape, dtype='float32', name='x') + mask = paddle.data(shape=shape, dtype='bool', name='mask') + np_x = np.random.random(shape).astype('float32') + np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) + + out = paddle.masked_select(x, mask) + np_out = np_masked_select(np_x, np_mask) + + exe = paddle.static.Executor(place=paddle.CPUPlace()) + + res = exe.run(paddle.static.default_main_program(), + feed={"x": np_x, + "mask": np_mask}, + fetch_list=[out]) + self.assertEqual(np.allclose(res, np_out), True) + + +class TestMaskedSelectError(unittest.TestCase): + def test_error(self): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + + shape = [8, 9, 6] + x = paddle.data(shape=shape, dtype='float32', name='x') + mask = paddle.data(shape=shape, dtype='bool', name='mask') + mask_float = paddle.data( + shape=shape, dtype='float32', name='mask_float') + np_x = np.random.random(shape).astype('float32') + np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) + + def test_x_type(): + paddle.masked_select(np_x, mask) + + self.assertRaises(TypeError, test_x_type) + + def test_mask_type(): + paddle.masked_select(x, np_mask) + + self.assertRaises(TypeError, test_mask_type) + + def test_mask_dtype(): + paddle.masked_select(x, mask_float) + + self.assertRaises(TypeError, test_mask_dtype) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py index f6eff22d6ce5f06d8853d6244f79b4b07b3fa4f5..00137f63e244a0e166047e89f9ef436da158ed16 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py @@ -189,15 +189,15 @@ class TestMathOpPatches(unittest.TestCase): @prog_scope() def test_integer_div(self): a = fluid.layers.data(name="a", shape=[1], dtype='int64') - b = a / 7 + b = a / 2 place = fluid.CPUPlace() exe = fluid.Executor(place) - a_np = numpy.array([3, 4, 10, 14, 9, 18]).astype('int64') + a_np = numpy.array([3, 4, 10, 14, 9, 18]) b_np, = exe.run(fluid.default_main_program(), feed={"a": a_np}, fetch_list=[b]) - - b_np_actual = (a_np / 7).astype('int64') + # for paddle2.0, use true_divide + b_np_actual = (a_np / 2.0) self.assertTrue(numpy.array_equal(b_np, b_np_actual)) @prog_scope() diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index 803293be9b7d637875b56b443b04c246737ed2f8..9bb12d546550a821e8a133dd9c91d5d41a50b1b2 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -15,6 +15,7 @@ from __future__ import print_function import unittest +import paddle import paddle.fluid as fluid import numpy as np import six @@ -284,6 +285,223 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertEqual((a != b).dtype, fluid.core.VarDesc.VarType.BOOL) self.assertTrue(np.array_equal((a != b).numpy(), a_np != b_np)) + def test_tensor_patch_method(self): + paddle.disable_static() + x_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype) + y_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype) + z_np = np.random.uniform(-1, 1, [6, 9]).astype(self.dtype) + + x = paddle.to_tensor(x_np) + y = paddle.to_tensor(y_np) + z = paddle.to_tensor(z_np) + + a = paddle.to_tensor([[1, 1], [2, 2], [3, 3]]) + b = paddle.to_tensor([[1, 1], [2, 2], [3, 3]]) + + # 1. Unary operation for Tensor + self.assertEqual(x.dim(), 2) + self.assertEqual(x.ndimension(), 2) + self.assertEqual(x.ndim, 2) + self.assertEqual(x.size(), [2, 3]) + self.assertTrue( + np.array_equal(x.sigmoid().numpy(), fluid.layers.sigmoid(x).numpy( + ))) + self.assertTrue( + np.array_equal(x.logsigmoid().numpy(), + fluid.layers.logsigmoid(x).numpy())) + self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) + self.assertTrue( + np.array_equal(x.tanh().numpy(), paddle.tanh(x).numpy())) + self.assertTrue( + np.array_equal(x.atan().numpy(), paddle.atan(x).numpy())) + self.assertTrue( + np.array_equal(x.tanh_shrink().numpy(), + fluid.layers.tanh_shrink(x).numpy())) + self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy())) + m = x.abs() + self.assertTrue( + np.array_equal(m.sqrt().numpy(), paddle.sqrt(m).numpy())) + self.assertTrue( + np.array_equal(m.rsqrt().numpy(), paddle.rsqrt(m).numpy())) + self.assertTrue( + np.array_equal(x.ceil().numpy(), paddle.ceil(x).numpy())) + self.assertTrue( + np.array_equal(x.floor().numpy(), paddle.floor(x).numpy())) + self.assertTrue(np.array_equal(x.cos().numpy(), paddle.cos(x).numpy())) + self.assertTrue( + np.array_equal(x.acos().numpy(), paddle.acos(x).numpy())) + self.assertTrue( + np.array_equal(x.asin().numpy(), paddle.asin(x).numpy())) + self.assertTrue(np.array_equal(x.sin().numpy(), paddle.sin(x).numpy())) + self.assertTrue( + np.array_equal(x.sinh().numpy(), paddle.sinh(x).numpy())) + self.assertTrue( + np.array_equal(x.cosh().numpy(), paddle.cosh(x).numpy())) + self.assertTrue( + np.array_equal(x.round().numpy(), paddle.round(x).numpy())) + self.assertTrue( + np.array_equal(x.reciprocal().numpy(), paddle.reciprocal(x).numpy( + ))) + self.assertTrue( + np.array_equal(x.square().numpy(), paddle.square(x).numpy())) + self.assertTrue( + np.array_equal(x.softplus().numpy(), + fluid.layers.softplus(x).numpy())) + self.assertTrue( + np.array_equal(x.softsign().numpy(), + fluid.layers.softsign(x).numpy())) + self.assertTrue( + np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) + self.assertTrue( + np.array_equal(x[0].t().numpy(), paddle.t(x[0]).numpy())) + m = paddle.to_tensor(np.random.uniform(1, 2, [3, 3]), 'float32') + m = m.matmul(m.t()) + self.assertTrue( + np.array_equal(m.cholesky().numpy(), paddle.cholesky(m).numpy())) + + self.assertTrue( + np.array_equal(x.is_empty().numpy(), paddle.is_empty(x).numpy())) + self.assertTrue( + np.array_equal(x.isfinite().numpy(), paddle.isfinite(x).numpy())) + self.assertTrue( + np.array_equal( + x.cast('int32').numpy(), paddle.cast(x, 'int32').numpy())) + self.assertTrue( + np.array_equal( + x.expand([3, 2, 3]).numpy(), + paddle.expand(x, [3, 2, 3]).numpy())) + self.assertTrue( + np.array_equal( + x.tile([2, 2]).numpy(), paddle.tile(x, [2, 2]).numpy())) + self.assertTrue( + np.array_equal(x.flatten().numpy(), paddle.flatten(x).numpy())) + index = paddle.to_tensor([0, 1]) + self.assertTrue( + np.array_equal( + x.gather(index).numpy(), paddle.gather(x, index).numpy())) + index = paddle.to_tensor([[0, 1], [1, 2]]) + self.assertTrue( + np.array_equal( + x.gather_nd(index).numpy(), paddle.gather_nd(x, index).numpy())) + self.assertTrue( + np.array_equal( + x.reverse([0, 1]).numpy(), paddle.reverse(x, [0, 1]).numpy())) + self.assertTrue( + np.array_equal( + a.reshape([3, 2]).numpy(), paddle.reshape(a, [3, 2]).numpy())) + self.assertTrue( + np.array_equal( + x.slice([0, 1], [0, 0], [1, 2]).numpy(), + paddle.slice(x, [0, 1], [0, 0], [1, 2]).numpy())) + self.assertTrue( + np.array_equal( + x.split(2)[0].numpy(), paddle.split(x, 2)[0].numpy())) + m = paddle.to_tensor( + np.random.uniform(-1, 1, [1, 6, 1, 1]).astype(self.dtype)) + self.assertTrue( + np.array_equal( + m.squeeze([]).numpy(), paddle.squeeze(m, []).numpy())) + self.assertTrue( + np.array_equal( + m.squeeze([1, 2]).numpy(), paddle.squeeze(m, [1, 2]).numpy())) + m = paddle.to_tensor([2, 3, 3, 1, 5, 3], 'float32') + self.assertTrue( + np.array_equal(m.unique()[0].numpy(), paddle.unique(m)[0].numpy())) + self.assertTrue( + np.array_equal(m.unique_with_counts()[2], + paddle.unique_with_counts(m)[2])) + self.assertTrue(np.array_equal(x.flip([0]), paddle.flip(x, [0]))) + self.assertTrue(np.array_equal(x.unbind(0), paddle.unbind(x, 0))) + self.assertTrue(np.array_equal(x.roll(1), paddle.roll(x, 1))) + self.assertTrue(np.array_equal(x.cumsum(1), paddle.cumsum(x, 1))) + m = paddle.to_tensor(1) + self.assertTrue(np.array_equal(m.increment(), paddle.increment(m))) + m = x.abs() + self.assertTrue(np.array_equal(m.log(), paddle.log(m))) + self.assertTrue(np.array_equal(x.pow(2), paddle.pow(x, 2))) + self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x))) + + # 2. Binary operation + self.assertTrue( + np.array_equal( + x.matmul(y, True, False).numpy(), + paddle.matmul(x, y, True, False).numpy())) + self.assertTrue( + np.array_equal( + x.norm( + p='fro', axis=[0, 1]).numpy(), + paddle.norm( + x, p='fro', axis=[0, 1]).numpy())) + self.assertTrue( + np.array_equal(x.dist(y).numpy(), paddle.dist(x, y).numpy())) + self.assertTrue( + np.array_equal(x.cross(y).numpy(), paddle.cross(x, y).numpy())) + m = x.expand([2, 2, 3]) + n = y.expand([2, 2, 3]).transpose([0, 2, 1]) + self.assertTrue( + np.array_equal(m.bmm(n).numpy(), paddle.bmm(m, n).numpy())) + self.assertTrue( + np.array_equal( + x.histogram(5, -1, 1).numpy(), + paddle.histogram(x, 5, -1, 1).numpy())) + self.assertTrue( + np.array_equal(x.equal(y).numpy(), paddle.equal(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.greater_equal(y).numpy(), paddle.greater_equal(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.greater_than(y).numpy(), paddle.greater_than(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.less_equal(y).numpy(), paddle.less_equal(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.less_than(y).numpy(), paddle.less_than(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.not_equal(y).numpy(), paddle.not_equal(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.equal_all(y).numpy(), paddle.equal_all(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.allclose(y).numpy(), paddle.allclose(x, y).numpy())) + m = x.expand([2, 2, 3]) + self.assertTrue( + np.array_equal( + x.expand_as(m).numpy(), paddle.expand_as(x, m).numpy())) + index = paddle.to_tensor([2, 1, 0]) + self.assertTrue( + np.array_equal( + a.scatter(index, b).numpy(), + paddle.scatter(a, index, b).numpy())) + + # 3. Bool tensor operation + x = paddle.to_tensor([[True, False], [True, False]]) + y = paddle.to_tensor([[False, False], [False, True]]) + self.assertTrue( + np.array_equal(x.reduce_all().numpy(), paddle.reduce_all(x).numpy( + ))) + self.assertTrue( + np.array_equal(x.reduce_any().numpy(), paddle.reduce_any(x).numpy( + ))) + self.assertTrue( + np.array_equal( + x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.logical_not(y).numpy(), paddle.logical_not(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.logical_or(y).numpy(), paddle.logical_or(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.logical_xor(y).numpy(), paddle.logical_xor(x, y).numpy())) + self.assertTrue( + np.array_equal( + x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py new file mode 100644 index 0000000000000000000000000000000000000000..884139a23d51c95c79439b91d501dc935baeae36 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -0,0 +1,336 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core + +import paddle +import paddle.fluid as fluid +import paddle.fluid.framework as framework + + +def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): + """Reference forward implementation using np.matmul.""" + # np.matmul does not support the transpose flags, so we manually + # transpose X and Y appropriately. + if transpose_X: + if X.ndim == 1: + X = X.reshape((X.size, )) + elif X.ndim == 2: + X = X.T + else: + dim = [i for i in range(len(X.shape))] + dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1] + X = np.transpose(X, tuple(dim)) + if transpose_Y: + if Y.ndim == 1: + Y = Y.reshape((Y.size, )) + else: + dim = [i for i in range(len(Y.shape))] + dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1] + Y = np.transpose(Y, tuple(dim)) + + Out = np.matmul(X, Y) + if not Out.shape: + # We do not support 0-dimensional Tensors (scalars). So where + # np.matmul outputs a scalar, we must convert to a Tensor of + # shape (1, ) instead. + # Everywhere else, we are compatible with np.matmul. + Out = np.array([Out], dtype="float64") + return Out + + +class TestMatMulV2Op(OpTest): + """ + case 1 + """ + + def config(self): + self.x_shape = (100, ) + self.y_shape = (100, ) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + def setUp(self): + self.config() + self.op_type = "matmul_v2" + x = np.random.random(self.x_shape).astype(self.dtype) + y = np.random.random(self.y_shape).astype(self.dtype) + result = reference_matmul(x, y, self.trans_x, self.trans_y) + + self.inputs = { + 'X': x, + 'Y': y, + } + self.attrs = {'trans_x': self.trans_x, 'trans_y': self.trans_y} + self.outputs = {'Out': result} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out') + + +class TestMatMuklOp2(TestMatMulV2Op): + """ + case 2 + """ + + def config(self): + self.x_shape = (100, ) + self.y_shape = (1, 3, 2, 100) + self.trans_x = False + self.trans_y = True + self.dtype = "float64" + + +class TestMatMuklOp3(TestMatMulV2Op): + """ + case 3 + """ + + def config(self): + self.x_shape = (100, ) + self.y_shape = (1, 1, 100, 2) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp4(TestMatMulV2Op): + """ + case 4 + """ + + def config(self): + self.x_shape = (100, ) + self.y_shape = (1, 2, 100, 2) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp5(TestMatMulV2Op): + """ + case 5 + """ + + def config(self): + self.x_shape = (1, 1, 100, 2) + self.y_shape = (100, ) + self.trans_x = True + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp6(TestMatMulV2Op): + """ + case 6 + """ + + def config(self): + self.x_shape = (1, 2, 100, 1) + self.y_shape = (100, ) + self.trans_x = True + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp7(TestMatMulV2Op): + """ + case 7 + """ + + def config(self): + self.x_shape = (1, 2, 1, 100) + self.y_shape = (100, ) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp8(TestMatMulV2Op): + """ + case 8 + """ + + def config(self): + self.x_shape = (1, 1, 2, 100) + self.y_shape = (1, 1, 100, 2) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp9(TestMatMulV2Op): + """ + case 9 + """ + + def config(self): + self.x_shape = (1, 1, 1, 100) + self.y_shape = (2, 1, 2, 100) + self.trans_x = False + self.trans_y = True + self.dtype = "float64" + + +class TestMatMuklOp10(TestMatMulV2Op): + """ + case 10 + """ + + def config(self): + self.x_shape = (1, 1, 2, 100) + self.y_shape = (1, 2, 100, 2) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp11(TestMatMulV2Op): + """ + case 11 + """ + + def config(self): + self.x_shape = (2, 1, 2, 100) + self.y_shape = (1, 1, 100, 2) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp12(TestMatMulV2Op): + """ + case 12 + """ + + def config(self): + self.x_shape = (2, 1, 100, 2) + self.y_shape = (1, 1, 100, 2) + self.trans_x = True + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp13(TestMatMulV2Op): + """ + case 13 + """ + + def config(self): + self.x_shape = (2, 2, 100, 2) + self.y_shape = (2, 2, 100, 2) + self.trans_x = True + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp14(TestMatMulV2Op): + """ + case 14_1 + """ + + def config(self): + self.x_shape = (3, 1, 1, 100, 2) + self.y_shape = (1, 2, 2, 100, 2) + self.trans_x = True + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp15(TestMatMulV2Op): + """ + case 14_2 + """ + + def config(self): + self.x_shape = (3, 1, 1, 2, 100) + self.y_shape = (1, 2, 2, 100, 1) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp16(TestMatMulV2Op): + """ + case 16 : to check the gradient for special case + """ + + def config(self): + self.x_shape = (100) + self.y_shape = (1, 2, 2, 100, 1) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMuklOp17(TestMatMulV2Op): + """ + case 17 : to check the gradient for special case + """ + + def config(self): + self.x_shape = (2, 1, 100) + self.y_shape = (100) + self.trans_x = False + self.trans_y = False + self.dtype = "float64" + + +class TestMatMulV2API(unittest.TestCase): + def setUp(self): + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input_x = fluid.data(name="input_x", shape=[4, 3], dtype="float32") + input_y = fluid.data(name="input_y", shape=[3, 4], dtype="float32") + + result = paddle.matmul(input_x, input_y) + + x_np = np.random.random([4, 3]).astype("float32") + y_np = np.random.random([3, 4]).astype("float32") + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input_x": x_np, + "input_y": y_np}, + fetch_list=[result]) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + input_x = np.random.random([4, 3]).astype("float64") + input_y = np.random.random([3, 4]).astype("float64") + x = paddle.to_tensor(input_x) + y = paddle.to_tensor(input_y) + result = paddle.matmul(x, y) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_max_op.py b/python/paddle/fluid/tests/unittests/test_max_op.py index e2bdaba91a68ff17d8d17724f8cbd5d8ad684d08..c9afc4bec66f2927a674ac15e807fe01f724c64f 100644 --- a/python/paddle/fluid/tests/unittests/test_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_max_op.py @@ -32,7 +32,7 @@ class ApiMaxTest(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="float32") + data = paddle.static.data("data", shape=[10, 10], dtype="float32") result_max = paddle.max(x=data, axis=1) exe = paddle.static.Executor(self.place) input_data = np.random.rand(10, 10).astype(np.float32) @@ -41,7 +41,7 @@ class ApiMaxTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_max = paddle.max(x=data, axis=0) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) @@ -50,7 +50,7 @@ class ApiMaxTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_max = paddle.max(x=data, axis=(0, 1)) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) @@ -71,8 +71,8 @@ class ApiMaxTest(unittest.TestCase): def test_axis_type(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") - axis = paddle.nn.data("axis", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") + axis = paddle.static.data("axis", shape=[10, 10], dtype="int64") result_min = paddle.min(data, axis) self.assertRaises(TypeError, test_axis_type) diff --git a/python/paddle/fluid/tests/unittests/test_maximum_op.py b/python/paddle/fluid/tests/unittests/test_maximum_op.py index bed2b57ec596978548e745a9afb2d4225ae4e5a8..5645597007a00cac9c75ec1ae90bc00a5bc75f22 100644 --- a/python/paddle/fluid/tests/unittests/test_maximum_op.py +++ b/python/paddle/fluid/tests/unittests/test_maximum_op.py @@ -36,8 +36,8 @@ class ApiMaximumTest(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data_x = paddle.nn.data("x", shape=[10, 15], dtype="float32") - data_y = paddle.nn.data("y", shape=[10, 15], dtype="float32") + data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") + data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.maximum(data_x, data_y) exe = paddle.static.Executor(self.place) res, = exe.run(feed={"x": self.input_x, @@ -48,8 +48,8 @@ class ApiMaximumTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data_x = paddle.nn.data("x", shape=[10, 15], dtype="float32") - data_z = paddle.nn.data("z", shape=[15], dtype="float32") + data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") + data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.maximum(data_x, data_z, axis=1) exe = paddle.static.Executor(self.place) res, = exe.run(feed={"x": self.input_x, diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py index 3799640b98800f660e72e3c8b4580949d5deb12a..29e79b096cf790858e8e07aedc5c6b76881e8f82 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_op.py +++ b/python/paddle/fluid/tests/unittests/test_mean_op.py @@ -22,6 +22,8 @@ import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid import Program, program_guard +np.random.seed(10) + class TestMeanOp(OpTest): def setUp(self): @@ -74,10 +76,105 @@ class TestFP16MeanOp(TestMeanOp): place, ['X'], 'Out', max_relative_error=0.8) +def ref_reduce_mean(x, axis=None, keepdim=False, reduce_all=False): + if isinstance(axis, list): + axis = tuple(axis) + if reduce_all: + axis = None + return np.mean(x, axis=axis, keepdims=keepdim) + + +class TestReduceMeanOp(OpTest): + def setUp(self): + self.op_type = 'reduce_mean' + self.dtype = 'float64' + self.shape = [2, 3, 4, 5] + self.axis = [0] + self.keepdim = False + self.reduce_all = False + self.set_attrs() + + np.random.seed(10) + x_np = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + out_np = ref_reduce_mean(x_np, self.axis, self.keepdim, self.reduce_all) + self.inputs = {'X': x_np} + self.outputs = {'Out': out_np} + self.attrs = { + 'dim': self.axis, + 'keep_dim': self.keepdim, + 'reduce_all': self.reduce_all + } + + def set_attrs(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], ['Out']) + + +class TestReduceMeanOpDefaultAttrs(TestReduceMeanOp): + def setUp(self): + self.op_type = 'reduce_mean' + self.dtype = 'float64' + self.shape = [2, 3, 4, 5] + + x_np = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + out_np = np.mean(x_np, axis=0) + self.inputs = {'X': x_np} + self.outputs = {'Out': out_np} + + +class TestReduceMeanOpFloat32(TestReduceMeanOp): + def set_attrs(self): + self.dtype = 'float32' + + +class TestReduceMeanOpShape1D(TestReduceMeanOp): + def set_attrs(self): + self.shape = [100] + + +class TestReduceMeanOpShape6D(TestReduceMeanOp): + def set_attrs(self): + self.shape = [2, 3, 4, 5, 6, 7] + + +class TestReduceMeanOpAxisAll(TestReduceMeanOp): + def set_attrs(self): + self.axis = [0, 1, 2, 3] + + +class TestReduceMeanOpAxisTuple(TestReduceMeanOp): + def set_attrs(self): + self.axis = (0, 1, 2) + + +class TestReduceMeanOpAxisNegative(TestReduceMeanOp): + def set_attrs(self): + self.axis = [-2, -1] + + +class TestReduceMeanOpKeepdimTrue1(TestReduceMeanOp): + def set_attrs(self): + self.keepdim = True + + +class TestReduceMeanOpKeepdimTrue2(TestReduceMeanOp): + def set_attrs(self): + self.axis = [0, 1, 2, 3] + self.keepdim = True + + +class TestReduceMeanOpReduceAllTrue(TestReduceMeanOp): + def set_attrs(self): + self.reduce_all = True + + class TestMeanAPI(unittest.TestCase): - """ - test paddle.tensor.stat.mean - """ + # test paddle.tensor.stat.mean def setUp(self): self.x_shape = [2, 3, 4, 5] @@ -86,6 +183,7 @@ class TestMeanAPI(unittest.TestCase): else paddle.CPUPlace() def test_api_static(self): + paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): x = paddle.data('X', self.x_shape) out1 = paddle.mean(x) @@ -100,9 +198,11 @@ class TestMeanAPI(unittest.TestCase): fetch_list=[out1, out2, out3, out4, out5]) out_ref = np.mean(self.x) for out in res: - self.assertEqual(np.allclose(out, out_ref), True) + self.assertEqual(np.allclose(out, out_ref, rtol=1e-04), True) + + def test_api_dygraph(self): + paddle.disable_static(self.place) - def test_api_imperative(self): def test_case(x, axis=None, keepdim=False): x_tensor = paddle.to_variable(x) out = paddle.mean(x_tensor, axis, keepdim) @@ -111,9 +211,10 @@ class TestMeanAPI(unittest.TestCase): if len(axis) == 0: axis = None out_ref = np.mean(x, axis, keepdims=keepdim) - self.assertEqual(np.allclose(out.numpy(), out_ref), True) + self.assertEqual( + np.allclose( + out.numpy(), out_ref, rtol=1e-04), True) - paddle.disable_static(self.place) test_case(self.x) test_case(self.x, []) test_case(self.x, -1) @@ -124,9 +225,31 @@ class TestMeanAPI(unittest.TestCase): test_case(self.x, [0, 1, 2, 3]) paddle.enable_static() + def test_fluid_api(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = fluid.data("x", shape=[10, 10], dtype="float32") + out = fluid.layers.reduce_mean(input=x, dim=1) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + x_np = np.random.rand(10, 10).astype(np.float32) + res = exe.run(feed={"x": x_np}, fetch_list=[out]) + self.assertEqual(np.allclose(res[0], np.mean(x_np, axis=1)), True) + + with fluid.dygraph.guard(): + x_np = np.random.rand(10, 10).astype(np.float32) + x = fluid.dygraph.to_variable(x_np) + out = fluid.layers.reduce_mean(input=x, dim=1) + self.assertEqual(np.allclose(out.numpy(), np.mean(x_np, axis=1)), True) + def test_errors(self): + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 12]).astype('float32') + x = paddle.to_tensor(x) + self.assertRaises(Exception, paddle.mean, x, -3) + self.assertRaises(Exception, paddle.mean, x, 2) + paddle.enable_static() with paddle.static.program_guard(paddle.static.Program()): - x = paddle.data('X', [10, 12], 'int8') + x = paddle.data('X', [10, 12], 'int32') self.assertRaises(TypeError, paddle.mean, x) diff --git a/python/paddle/fluid/tests/unittests/test_metrics.py b/python/paddle/fluid/tests/unittests/test_metrics.py deleted file mode 100644 index ec27884cae2b0462951f6597b1b83e58d1c8af5d..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_metrics.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle.fluid as fluid -from paddle.fluid.framework import Program, program_guard - - -class TestMetricsDetectionMap(unittest.TestCase): - def test_detection_map(self): - program = fluid.Program() - with program_guard(program): - detect_res = fluid.layers.data( - name='detect_res', - shape=[10, 6], - append_batch_size=False, - dtype='float32') - label = fluid.layers.data( - name='label', - shape=[10, 1], - append_batch_size=False, - dtype='float32') - box = fluid.layers.data( - name='bbox', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - map_eval = fluid.metrics.DetectionMAP( - detect_res, label, box, class_num=21) - cur_map, accm_map = map_eval.get_map_var() - self.assertIsNotNone(cur_map) - self.assertIsNotNone(accm_map) - print(str(program)) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_min_op.py b/python/paddle/fluid/tests/unittests/test_min_op.py index e8bfe55f32a122ac9259b68d6a888f93757a76be..b9eff05c5ea9fb585421b6f99bf55b3bb95bf9ff 100644 --- a/python/paddle/fluid/tests/unittests/test_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_min_op.py @@ -32,7 +32,7 @@ class ApiMinTest(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="float32") + data = paddle.static.data("data", shape=[10, 10], dtype="float32") result_min = paddle.min(x=data, axis=1) exe = paddle.static.Executor(self.place) input_data = np.random.rand(10, 10).astype(np.float32) @@ -41,7 +41,7 @@ class ApiMinTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_min = paddle.min(x=data, axis=0) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) @@ -50,7 +50,7 @@ class ApiMinTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") result_min = paddle.min(x=data, axis=(0, 1)) exe = paddle.static.Executor(self.place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int64) @@ -71,8 +71,8 @@ class ApiMinTest(unittest.TestCase): def test_axis_type(): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data = paddle.nn.data("data", shape=[10, 10], dtype="int64") - axis = paddle.nn.data("axis", shape=[10, 10], dtype="int64") + data = paddle.static.data("data", shape=[10, 10], dtype="int64") + axis = paddle.static.data("axis", shape=[10, 10], dtype="int64") result_min = paddle.min(data, axis) self.assertRaises(TypeError, test_axis_type) diff --git a/python/paddle/fluid/tests/unittests/test_minimum_op.py b/python/paddle/fluid/tests/unittests/test_minimum_op.py index 550580407acf265e0f0bfae12961466c2d57e48a..4c08b7386ca2c5da04c0a289872dacf68a2ea040 100644 --- a/python/paddle/fluid/tests/unittests/test_minimum_op.py +++ b/python/paddle/fluid/tests/unittests/test_minimum_op.py @@ -36,8 +36,8 @@ class ApiMinimumTest(unittest.TestCase): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data_x = paddle.nn.data("x", shape=[10, 15], dtype="float32") - data_y = paddle.nn.data("y", shape=[10, 15], dtype="float32") + data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") + data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_min = paddle.minimum(data_x, data_y) exe = paddle.static.Executor(self.place) res, = exe.run(feed={"x": self.input_x, @@ -48,8 +48,8 @@ class ApiMinimumTest(unittest.TestCase): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): - data_x = paddle.nn.data("x", shape=[10, 15], dtype="float32") - data_z = paddle.nn.data("z", shape=[15], dtype="float32") + data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") + data_z = paddle.static.data("z", shape=[15], dtype="float32") result_min = paddle.minimum(data_x, data_z, axis=1) exe = paddle.static.Executor(self.place) res, = exe.run(feed={"x": self.input_x, diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py index 89052396cf94615aab0841090430509c38b8423f..753d96c44114a552f4bdd299602d7f13f672efbf 100644 --- a/python/paddle/fluid/tests/unittests/test_mse_loss.py +++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py @@ -69,6 +69,7 @@ class TestNNMseLoss(unittest.TestCase): for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") label_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -106,6 +107,7 @@ class TestNNMseLoss(unittest.TestCase): for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") label_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -143,6 +145,7 @@ class TestNNMseLoss(unittest.TestCase): for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") label_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -177,5 +180,112 @@ class TestNNMseLoss(unittest.TestCase): self.assertTrue(dy_result.shape, [1]) +class TestNNFunctionalMseLoss(unittest.TestCase): + def test_NNFunctionalMseLoss_mean(self): + for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: + input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + target_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else paddle.CPUPlace() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.data(name='input', shape=dim, dtype='float32') + target = paddle.data(name='target', shape=dim, dtype='float32') + mse_loss = paddle.nn.functional.mse_loss(input, target, 'mean') + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + static_result = exe.run( + prog, + feed={"input": input_np, + "target": target_np}, + fetch_list=[mse_loss]) + + paddle.disable_static() + dy_ret = paddle.nn.functional.mse_loss( + paddle.to_variable(input_np), + paddle.to_variable(target_np), 'mean') + dy_result = dy_ret.numpy() + + sub = input_np - target_np + expected = np.mean(sub * sub) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + self.assertTrue(dy_result.shape, [1]) + + def test_NNFunctionalMseLoss_sum(self): + for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: + input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + target_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else paddle.CPUPlace() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.data(name='input', shape=dim, dtype='float32') + target = paddle.data(name='target', shape=dim, dtype='float32') + mse_loss = paddle.nn.functional.mse_loss(input, target, 'sum') + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + static_result = exe.run( + prog, + feed={"input": input_np, + "target": target_np}, + fetch_list=[mse_loss]) + + paddle.disable_static() + dy_ret = paddle.nn.functional.mse_loss( + paddle.to_variable(input_np), + paddle.to_variable(target_np), 'sum') + dy_result = dy_ret.numpy() + + sub = input_np - target_np + expected = np.sum(sub * sub) + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + self.assertTrue(dy_result.shape, [1]) + + def test_NNFunctionalMseLoss_none(self): + for dim in [[10, 10], [2, 10, 10], [3, 3, 10, 10]]: + input_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + target_np = np.random.uniform(0.1, 0.5, dim).astype("float32") + paddle.enable_static() + prog = paddle.static.Program() + startup_prog = paddle.static.Program() + place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else paddle.CPUPlace() + with paddle.static.program_guard(prog, startup_prog): + input = paddle.data(name='input', shape=dim, dtype='float32') + target = paddle.data(name='target', shape=dim, dtype='float32') + mse_loss = paddle.nn.functional.mse_loss(input, target, 'none') + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + static_result = exe.run( + prog, + feed={"input": input_np, + "target": target_np}, + fetch_list=[mse_loss]) + + paddle.disable_static() + dy_ret = paddle.nn.functional.mse_loss( + paddle.to_variable(input_np), + paddle.to_variable(target_np), 'none') + dy_result = dy_ret.numpy() + + sub = input_np - target_np + expected = sub * sub + self.assertTrue(np.allclose(static_result, expected)) + self.assertTrue(np.allclose(static_result, dy_result)) + self.assertTrue(np.allclose(dy_result, expected)) + self.assertTrue(dy_result.shape, [1]) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_mul_op.py b/python/paddle/fluid/tests/unittests/test_mul_op.py index 8ca06aa952184daec6be59a09330c8f16f6ee1d6..5f223de1954f7b401ac031265cca8c2e661c7392 100644 --- a/python/paddle/fluid/tests/unittests/test_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_mul_op.py @@ -175,5 +175,57 @@ class TestFP16MulOp2(TestMulOp2): no_grad_set=set('Y')) +@unittest.skipIf(not core.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUMulOp1(TestMulOp): + def init_dtype_type(self): + self.dtype = np.float32 + + def test_check_output(self): + place = core.XPUPlace(0) + self.check_output_with_place(place, atol=1e-1) + + def test_check_grad_normal(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.5) + + def test_check_grad_ingore_x(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + + +@unittest.skipIf(not core.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUMulOp2(TestMulOp2): + def init_dtype_type(self): + self.dtype = np.float32 + + def test_check_output(self): + place = core.XPUPlace(0) + self.check_output_with_place(place, atol=2e-1) + + def test_check_grad_normal(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.9) + + def test_check_grad_ingore_x(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + place = core.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y')) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_multiply.py b/python/paddle/fluid/tests/unittests/test_multiply.py index f7f6e1f1aac678a00617e0693847b4346604a1ab..32d0f7b63b8fca495f1973d8831fdd9239cfc9f1 100644 --- a/python/paddle/fluid/tests/unittests/test_multiply.py +++ b/python/paddle/fluid/tests/unittests/test_multiply.py @@ -26,8 +26,10 @@ class TestMultiplyAPI(unittest.TestCase): def __run_static_graph_case(self, x_data, y_data, axis=-1): with program_guard(Program(), Program()): - x = paddle.nn.data(name='x', shape=x_data.shape, dtype=x_data.dtype) - y = paddle.nn.data(name='y', shape=y_data.shape, dtype=y_data.dtype) + x = paddle.static.data( + name='x', shape=x_data.shape, dtype=x_data.dtype) + y = paddle.static.data( + name='y', shape=y_data.shape, dtype=y_data.dtype) res = tensor.multiply(x, y, axis=axis) place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -109,14 +111,14 @@ class TestMultiplyError(unittest.TestCase): # test static computation graph: dtype can not be int8 paddle.enable_static() with program_guard(Program(), Program()): - x = paddle.nn.data(name='x', shape=[100], dtype=np.int8) - y = paddle.nn.data(name='y', shape=[100], dtype=np.int8) + x = paddle.static.data(name='x', shape=[100], dtype=np.int8) + y = paddle.static.data(name='y', shape=[100], dtype=np.int8) self.assertRaises(TypeError, tensor.multiply, x, y) # test static computation graph: inputs must be broadcastable with program_guard(Program(), Program()): - x = paddle.nn.data(name='x', shape=[20, 50], dtype=np.float64) - y = paddle.nn.data(name='y', shape=[20], dtype=np.float64) + x = paddle.static.data(name='x', shape=[20, 50], dtype=np.float64) + y = paddle.static.data(name='y', shape=[20], dtype=np.float64) self.assertRaises(fluid.core.EnforceNotMet, tensor.multiply, x, y) np.random.seed(7) diff --git a/python/paddle/fluid/tests/unittests/test_nll_loss.py b/python/paddle/fluid/tests/unittests/test_nll_loss.py index c25f8832807bc9a9da84ee44ee8172e8d1d0dd94..e7154193beaf788a9d20f3c131b1df3420918266 100644 --- a/python/paddle/fluid/tests/unittests/test_nll_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nll_loss.py @@ -907,10 +907,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): def test_x_dim_imperative_lt_2(): with fluid.dygraph.guard(): - x_np = np.array( - [0.88103855, 0.9908683, 0.6226845, 0.53331435, - 0.07999352]).astype(np.float32) - label_np = np.array([0, 2, 1, 1, 0]).astype(np.int64) + x_np = np.random.random(size=(5, )).astype(np.float64) + label_np = np.random.randint(0, 10, size=(5, )).astype(np.int64) x = paddle.to_variable(x_np) label = paddle.to_variable(label_np) nll_loss = paddle.nn.loss.NLLLoss() @@ -933,13 +931,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): def test_NLLLoss_reduction_imperative_not_sum_mean_none(): with fluid.dygraph.guard(): - x_np = np.array( - [[0.88103855, 0.9908683, 0.6226845], - [0.53331435, 0.07999352, 0.8549948], - [0.25879037, 0.39530203, 0.698465], - [0.73427284, 0.63575995, 0.18827209], - [0.05689114, 0.0862954, 0.6325046]]).astype(np.float32) - label_np = np.array([0, 2, 1, 1, 0]).astype(np.int64) + x_np = np.random.random(size=(5, 3)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) x = paddle.to_variable(x_np) label = paddle.to_variable(label_np) nll_loss = paddle.nn.loss.NLLLoss(reduction='') @@ -962,13 +955,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): def test_nll_loss_function_reduction_imperative_not_sum_mean_none(): with fluid.dygraph.guard(): - x_np = np.array( - [[0.88103855, 0.9908683, 0.6226845], - [0.53331435, 0.07999352, 0.8549948], - [0.25879037, 0.39530203, 0.698465], - [0.73427284, 0.63575995, 0.18827209], - [0.05689114, 0.0862954, 0.6325046]]).astype(np.float32) - label_np = np.array([0, 2, 1, 1, 0]).astype(np.int64) + x_np = np.random.random(size=(5, 3)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) x = paddle.to_variable(x_np) label = paddle.to_variable(label_np) res = paddle.nn.functional.nll_loss(x, label, reduction='') diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py new file mode 100644 index 0000000000000000000000000000000000000000..339f689998f817054611bd85b11945b61d1f649b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py @@ -0,0 +1,207 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import math +from op_test import OpTest +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.nn.functional as functional +import paddle.fluid.framework as framework +from paddle.fluid.framework import Program, program_guard + + +class TestOneHotOp(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0])]) + + out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_attr(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), 1, + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, 0, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_default_dtype(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0])]) + + out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), 1, + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, 0, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): + self.op_type = 'one_hot_v2' + self.depth = 10 + self.place = core.CPUPlace() + self.dimension = 12 + self.x = core.LoDTensor() + x_lod = [[4, 1, 3, 3]] + data = [np.random.randint(11, 20) for i in range(sum(x_lod[0]))] + data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1]) + self.x.set(data, self.place) + self.x.set_recursive_sequence_lengths(x_lod) + + def test_check_output(self): + program = Program() + with program_guard(program): + x = fluid.layers.data( + name='x', shape=[self.dimension], dtype='float32', lod_level=1) + block = program.current_block() + one_hot_out = block.create_var( + name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op( + type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) + exe = fluid.Executor(self.place) + + def run(): + exe.run(feed={'x': self.x}, + fetch_list=[one_hot_out], + return_numpy=False) + + self.assertRaises(core.EnforceNotMet, run) + + +class TestOneHotOpApi(unittest.TestCase): + def test_api(self): + num_classes = 10 + self._run(num_classes) + + def test_api_with_depthTensor(self): + num_classes = fluid.layers.assign(input=np.array([10], dtype=np.int32)) + self._run(num_classes) + + def test_api_with_dygraph(self): + num_classes = 10 + label = np.array( + [np.random.randint(0, num_classes - 1) + for i in range(6)]).reshape([6, 1]) + with fluid.dygraph.guard(): + one_hot_label = functional.one_hot( + x=fluid.dygraph.to_variable(label), num_classes=num_classes) + + def _run(self, num_classes): + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + one_hot_label = functional.one_hot(x=label, num_classes=num_classes) + + place = fluid.CPUPlace() + label_data = np.array([np.random.randint(0, 10 - 1) + for i in range(6)]).reshape([6, 1]) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + ret = exe.run(feed={'label': label_data, }, + fetch_list=[one_hot_label], + return_numpy=False) + + +class BadInputTestOnehotV2(unittest.TestCase): + def test_error(self): + with fluid.program_guard(fluid.Program()): + + def test_bad_x(): + label = fluid.layers.data( + name="label", + shape=[4], + append_batch_size=False, + dtype="float32") + one_hot_label = functional.one_hot(x=label, num_classes=4) + + self.assertRaises(TypeError, test_bad_x) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py index 4f60f3e39a57365163cb3f5f3e061e53f8fd654b..0ebe769fb9bce1aee8412ccebc216c2c85e97775 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py @@ -54,9 +54,11 @@ def create_test_case(margin, reduction): margin=margin, reduction=reduction) with program_guard(Program(), Program()): - x = paddle.nn.data(name="x", shape=[10, 10], dtype="float64") - y = paddle.nn.data(name="y", shape=[10, 10], dtype="float64") - label = paddle.nn.data( + x = paddle.static.data( + name="x", shape=[10, 10], dtype="float64") + y = paddle.static.data( + name="y", shape=[10, 10], dtype="float64") + label = paddle.static.data( name="label", shape=[10, 10], dtype="float64") result = paddle.nn.functional.margin_ranking_loss( x, y, label, margin, reduction) @@ -78,9 +80,11 @@ def create_test_case(margin, reduction): margin=margin, reduction=reduction) with program_guard(Program(), Program()): - x = paddle.nn.data(name="x", shape=[10, 10], dtype="float64") - y = paddle.nn.data(name="y", shape=[10, 10], dtype="float64") - label = paddle.nn.data( + x = paddle.static.data( + name="x", shape=[10, 10], dtype="float64") + y = paddle.static.data( + name="y", shape=[10, 10], dtype="float64") + label = paddle.static.data( name="label", shape=[10, 10], dtype="float64") margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) @@ -173,6 +177,16 @@ class MarginRakingLossError(unittest.TestCase): self.assertRaises(ValueError, test_margin_value_error) + def test_functional_margin_value_error(): + x = paddle.static.data(name="x", shape=[10, 10], dtype="float64") + y = paddle.static.data(name="y", shape=[10, 10], dtype="float64") + label = paddle.static.data( + name="label", shape=[10, 10], dtype="float64") + result = paddle.nn.functional.margin_ranking_loss( + x, y, label, margin=0.1, reduction="reduction_mean") + + self.assertRaises(ValueError, test_functional_margin_value_error) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py new file mode 100644 index 0000000000000000000000000000000000000000..d52a1f5d5b16ca7e0d58230a1a17624e5bff0b02 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py @@ -0,0 +1,107 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid.core as core +from op_test import OpTest +from scipy.special import expit, erf +import paddle +import paddle.fluid as fluid +import paddle.nn as nn +import paddle.nn.functional as functional + + +class TestNNSigmoidAPI(unittest.TestCase): + def setUp(self): + self.init_data() + + def init_data(self): + self.x_shape = [10, 15] + self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) + self.y = self.ref_forward(self.x) + + def ref_forward(self, x): + return 1 / (1 + np.exp(-x)) + + def ref_backward(self, y, dy): + return dy * y * (1 - y) + + def check_static_api(self, place): + paddle.enable_static() + main_program = paddle.static.Program() + mysigmoid = nn.Sigmoid(name="api_sigmoid") + with paddle.static.program_guard(main_program): + x = paddle.static.data(name='x', shape=self.x_shape) + x.stop_gradient = False + y = mysigmoid(x) + fluid.backward.append_backward(paddle.mean(y)) + exe = paddle.static.Executor(place) + out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) + self.assertTrue(np.allclose(out[0], self.y)) + self.assertTrue(y.name.startswith("api_sigmoid")) + + def check_dynamic_api(self, place): + paddle.disable_static(place) + x = paddle.to_variable(self.x) + mysigmoid = nn.Sigmoid() + y = mysigmoid(x) + self.assertTrue(np.allclose(y.numpy(), self.y)) + + def test_check_api(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for place in places: + self.check_dynamic_api(place) + self.check_static_api(place) + + +class TestNNFunctionalSigmoidAPI(unittest.TestCase): + def setUp(self): + self.init_data() + + def init_data(self): + self.x_shape = [10, 15] + self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) + self.y = self.ref_forward(self.x) + + def ref_forward(self, x): + return 1 / (1 + np.exp(-x)) + + def check_static_api(self, place): + paddle.enable_static() + main_program = paddle.static.Program() + with paddle.static.program_guard(main_program): + x = paddle.static.data(name='x', shape=self.x_shape) + y = functional.sigmoid(x, name="api_sigmoid") + exe = paddle.static.Executor(fluid.CPUPlace()) + out = exe.run(main_program, feed={'x': self.x}, fetch_list=[y]) + self.assertTrue(np.allclose(out[0], self.y)) + + def check_dynamic_api(self): + paddle.disable_static() + x = paddle.to_variable(self.x) + y = functional.sigmoid(x) + self.assertTrue(np.allclose(y.numpy(), self.y)) + + def test_check_api(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for place in places: + self.check_static_api(place) + self.check_dynamic_api() diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py index e6b7a3e7603f53d78052d5de309d6ed7d84c4660..0d083038c6131215dcb572eca1e782c43e82d20a 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_all.py +++ b/python/paddle/fluid/tests/unittests/test_norm_all.py @@ -23,16 +23,16 @@ import paddle.fluid as fluid def p_norm(x, axis, porder, keepdims=False): if axis is None: axis = -1 - xp = np.power(np.abs(x), porder) - s = np.sum(xp, axis=axis, keepdims=keepdims) - r = np.power(s, 1.0 / porder) + r = np.linalg.norm( + x, ord=porder, axis=axis, keepdims=keepdims).astype(x.dtype) return r def frobenius_norm(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) if axis is None: axis = (-2, -1) - r = np.linalg.norm(x, ord='fro', axis=axis, keepdims=keepdims) + r = np.linalg.norm( + x, ord='fro', axis=axis, keepdims=keepdims).astype(x.dtype) return r @@ -89,6 +89,7 @@ class TestPnormOp(OpTest): 'porder': float(self.porder) } self.outputs = {'Out': norm} + self.gradient = self.calc_gradient() def test_check_output(self): self.check_output() @@ -104,6 +105,34 @@ class TestPnormOp(OpTest): self.keepdim = False self.dtype = "float64" + def calc_gradient(self): + self.attrs = { + 'epsilon': self.epsilon, + 'axis': self.axis, + 'keepdim': self.keepdim, + 'porder': float(self.porder) + } + x = self.inputs["X"] + porder = self.attrs["porder"] + axis = self.attrs["axis"] + if porder == 0: + grad = np.zeros(x.shape).astype(x.dtype) + elif porder in [float("inf"), float("-inf")]: + norm = p_norm(x, axis=axis, porder=porder, keepdims=True) + x_abs = np.abs(x) + grad = np.sign(x) + grad[x_abs != norm] = 0.0 + else: + norm = p_norm(x, axis=axis, porder=porder, keepdims=True) + grad = np.power(norm, 1 - porder) * np.power( + np.abs(x), porder - 1) * np.sign(x) + + numel = 1 + for s in x.shape: + numel *= s + numel /= x.shape[axis] + return [grad.astype(x.dtype) * 1 / numel] + class TestPnormOp2(TestPnormOp): def init_test_case(self): @@ -118,6 +147,45 @@ class TestPnormOp2(TestPnormOp): self.check_grad(['X'], 'Out') +class TestPnormOp3(TestPnormOp): + def init_test_case(self): + self.shape = [3, 20, 3] + self.axis = 2 + self.epsilon = 1e-12 + self.porder = np.inf + self.keepdim = True + self.dtype = "float32" + + def test_check_grad(self): + self.check_grad(['X'], 'Out', user_defined_grads=self.gradient) + + +class TestPnormOp4(TestPnormOp): + def init_test_case(self): + self.shape = [3, 20, 3] + self.axis = 2 + self.epsilon = 1e-12 + self.porder = -np.inf + self.keepdim = True + self.dtype = "float32" + + def test_check_grad(self): + self.check_grad(['X'], 'Out', user_defined_grads=self.gradient) + + +class TestPnormOp5(TestPnormOp): + def init_test_case(self): + self.shape = [3, 20, 3] + self.axis = 2 + self.epsilon = 1e-12 + self.porder = 0 + self.keepdim = True + self.dtype = "float32" + + def test_check_grad(self): + self.check_grad(['X'], 'Out', user_defined_grads=self.gradient) + + def run_out(self, p, axis, shape_x, shape_y, dtype): with fluid.program_guard(fluid.Program()): data1 = fluid.data(name="X", shape=shape_x, dtype=dtype) @@ -170,6 +238,9 @@ class API_NormTest(unittest.TestCase): run_fro(self, p='fro', axis=[0, 1], shape_x=[3, 3, 4], dtype="float64") run_pnorm(self, p=2, axis=None, shape_x=[3, 4], dtype="float32") run_pnorm(self, p=2, axis=1, shape_x=[3, 4], dtype="float64") + run_pnorm(self, p=np.inf, axis=1, shape_x=[3, 4], dtype="float32") + run_pnorm(self, p=-np.inf, axis=1, shape_x=[3, 4], dtype="float64") + run_pnorm(self, p=0, axis=1, shape_x=[3, 4], dtype="float64") def test_name(self): with fluid.program_guard(fluid.Program()): diff --git a/python/paddle/fluid/tests/unittests/test_normal.py b/python/paddle/fluid/tests/unittests/test_normal.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d9af4d50be77bd1d2ecc11dd872ef612209f1e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_normal.py @@ -0,0 +1,197 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import paddle +import copy + +np.random.seed(10) + + +class TestNormalAPI(unittest.TestCase): + def setUp(self): + self.mean = 1.0 + self.std = 0.0 + self.shape = None + self.repeat_num = 1000 + self.set_attrs() + self.dtype = self.get_dtype() + self.place=paddle.CUDAPlace(0) \ + if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def set_attrs(self): + self.shape = [8, 12] + + def get_shape(self): + if isinstance(self.mean, np.ndarray): + shape = self.mean.shape + elif isinstance(self.std, np.ndarray): + shape = self.std.shape + else: + shape = self.shape + return list(shape) + + def get_dtype(self): + if isinstance(self.mean, np.ndarray): + return self.mean.dtype + elif isinstance(self.std, np.ndarray): + return self.std.dtype + else: + return 'float32' + + def static_api(self): + shape = self.get_shape() + ret_all_shape = copy.deepcopy(shape) + ret_all_shape.insert(0, self.repeat_num) + ret_all = np.zeros(ret_all_shape, self.dtype) + if isinstance(self.mean, np.ndarray) \ + and isinstance(self.std, np.ndarray): + with paddle.static.program_guard(paddle.static.Program()): + mean = paddle.data('Mean', self.mean.shape, self.mean.dtype) + std = paddle.data('Std', self.std.shape, self.std.dtype) + out = paddle.normal(mean, std, self.shape) + + exe = paddle.static.Executor(self.place) + for i in range(self.repeat_num): + ret = exe.run(feed={ + 'Mean': self.mean, + 'Std': self.std.reshape(shape) + }, + fetch_list=[out]) + ret_all[i] = ret[0] + return ret_all + elif isinstance(self.mean, np.ndarray): + with paddle.static.program_guard(paddle.static.Program()): + mean = paddle.data('Mean', self.mean.shape, self.mean.dtype) + out = paddle.normal(mean, self.std, self.shape) + + exe = paddle.static.Executor(self.place) + for i in range(self.repeat_num): + ret = exe.run(feed={'Mean': self.mean}, fetch_list=[out]) + ret_all[i] = ret[0] + return ret_all + elif isinstance(self.std, np.ndarray): + with paddle.static.program_guard(paddle.static.Program()): + std = paddle.data('Std', self.std.shape, self.std.dtype) + out = paddle.normal(self.mean, std, self.shape) + + exe = paddle.static.Executor(self.place) + for i in range(self.repeat_num): + ret = exe.run(feed={'Std': self.std}, fetch_list=[out]) + ret_all[i] = ret[0] + return ret_all + else: + with paddle.static.program_guard(paddle.static.Program()): + out = paddle.normal(self.mean, self.std, self.shape) + + exe = paddle.static.Executor(self.place) + for i in range(self.repeat_num): + ret = exe.run(fetch_list=[out]) + ret_all[i] = ret[0] + return ret_all + + def dygraph_api(self): + paddle.disable_static(self.place) + shape = self.get_shape() + ret_all_shape = copy.deepcopy(shape) + ret_all_shape.insert(0, self.repeat_num) + ret_all = np.zeros(ret_all_shape, self.dtype) + + mean = paddle.to_tensor(self.mean) \ + if isinstance(self.mean, np.ndarray) else self.mean + std = paddle.to_tensor(self.std) \ + if isinstance(self.std, np.ndarray) else self.std + for i in range(self.repeat_num): + out = paddle.normal(mean, std, self.shape) + ret_all[i] = out.numpy() + paddle.enable_static() + return ret_all + + def test_api(self): + ret_static = self.static_api() + ret_dygraph = self.dygraph_api() + for ret in [ret_static, ret_dygraph]: + shape_ref = self.get_shape() + self.assertEqual(shape_ref, list(ret[0].shape)) + + ret = ret.flatten().reshape([self.repeat_num, -1]) + mean = np.mean(ret, axis=0) + std = np.std(ret, axis=0) + mean_ref=self.mean.reshape([1, -1]) \ + if isinstance(self.mean, np.ndarray) else self.mean + std_ref=self.std.reshape([1, -1]) \ + if isinstance(self.std, np.ndarray) else self.std + self.assertTrue(np.allclose(mean_ref, mean, 0.1, 0.1)) + self.assertTrue(np.allclose(std_ref, std, 0.1, 0.1)) + + +class TestNormalAPI_mean_is_tensor(TestNormalAPI): + def set_attrs(self): + self.mean = np.random.uniform(-2, -1, [2, 3, 4, 5]).astype('float64') + + +class TestNormalAPI_std_is_tensor(TestNormalAPI): + def set_attrs(self): + self.std = np.random.uniform(0.7, 1, [2, 3, 17]).astype('float64') + + +class TestNormalAPI_mean_std_are_tensor(TestNormalAPI): + def set_attrs(self): + self.mean = np.random.uniform(1, 2, [1, 100]).astype('float64') + self.std = np.random.uniform(0.5, 1, [1, 100]).astype('float64') + + +class TestNormalAPI_mean_std_are_tensor_with_different_dtype(TestNormalAPI): + def set_attrs(self): + self.mean = np.random.uniform(1, 2, [100]).astype('float64') + self.std = np.random.uniform(1, 2, [100]).astype('float32') + + +class TestNormalAlias(unittest.TestCase): + def test_alias(self): + paddle.disable_static() + shape = [1, 2, 3] + out1 = paddle.normal(shape=shape) + out2 = paddle.tensor.normal(shape=shape) + out3 = paddle.tensor.random.normal(shape=shape) + paddle.enable_static() + + +class TestNormalErrors(unittest.TestCase): + def test_errors(self): + with paddle.static.program_guard(paddle.static.Program()): + mean = [1, 2, 3] + self.assertRaises(TypeError, paddle.normal, mean) + + std = [1, 2, 3] + self.assertRaises(TypeError, paddle.normal, std=std) + + mean = paddle.data('Mean', [100], 'int32') + self.assertRaises(TypeError, paddle.normal, mean) + + std = paddle.data('Std', [100], 'int32') + self.assertRaises(TypeError, paddle.normal, mean=1.0, std=std) + + self.assertRaises(TypeError, paddle.normal, shape=1) + + self.assertRaises(TypeError, paddle.normal, shape=[1.0]) + + shape = paddle.data('Shape', [100], 'float32') + self.assertRaises(TypeError, paddle.normal, shape=shape) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_normalize.py b/python/paddle/fluid/tests/unittests/test_normalize.py new file mode 100644 index 0000000000000000000000000000000000000000..6595a29b24ae23c9b38538035c9593ba77eecdb7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_normalize.py @@ -0,0 +1,102 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle +import paddle.nn.functional as F +import paddle.fluid as fluid +import paddle.fluid.core as core +import numpy as np + + +def p_normalize(x, axis=1, p=2, epsilon=1e-12, keepdims=True): + if len(x.shape) == 1: + axis = 0 + xp = np.power(np.abs(x), p) + s = np.sum(xp, axis=axis, keepdims=keepdims) + r = np.maximum(np.power(s, 1.0 / p), epsilon) + return x / r + + +class TestNNFunctionalNormalize(unittest.TestCase): + def setUp(self): + self.input_np = np.random.random(size=(10, 10)).astype(np.float32) + self.input_np2 = np.array([0.0, 0.0]).astype(np.float32) + self.expected0 = p_normalize(self.input_np) + self.expected1 = p_normalize(self.input_np, p=1.5) + self.expected2 = p_normalize(self.input_np, axis=0) + self.expected3 = p_normalize(self.input_np2) + + def run_imperative(self): + x = paddle.to_variable(self.input_np) + y = F.normalize(x) + self.assertTrue(np.allclose(y.numpy(), self.expected0)) + + y = F.normalize(x, p=1.5) + self.assertTrue(np.allclose(y.numpy(), self.expected1)) + + y = F.normalize(x, axis=0) + self.assertTrue(np.allclose(y.numpy(), self.expected2)) + + x = paddle.to_variable(self.input_np2) + y = F.normalize(x) + self.assertTrue(np.allclose(y.numpy(), self.expected3)) + + def run_static(self, use_gpu=False): + x = paddle.data(name='input', shape=[10, 10], dtype='float32') + x2 = paddle.data(name='input2', shape=[2], dtype='float32') + result0 = F.normalize(x) + result1 = F.normalize(x, p=1.5) + result2 = F.normalize(x, axis=0) + result3 = F.normalize(x, name='aaa') + result4 = F.normalize(x2) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + static_result = exe.run( + feed={"input": self.input_np, + "input2": self.input_np2}, + fetch_list=[result0, result1, result2, result4]) + + self.assertTrue(np.allclose(static_result[0], self.expected0)) + self.assertTrue(np.allclose(static_result[1], self.expected1)) + self.assertTrue(np.allclose(static_result[2], self.expected2)) + self.assertTrue('aaa' in result3.name) + self.assertTrue(np.allclose(static_result[3], self.expected3)) + + def test_cpu(self): + paddle.disable_static(place=paddle.fluid.CPUPlace()) + self.run_imperative() + paddle.enable_static() + + with fluid.program_guard(fluid.Program()): + self.run_static() + + def test_gpu(self): + if not fluid.core.is_compiled_with_cuda(): + return + + paddle.disable_static(place=paddle.fluid.CUDAPlace(0)) + self.run_imperative() + paddle.enable_static() + + with fluid.program_guard(fluid.Program()): + self.run_static(use_gpu=True) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py new file mode 100644 index 0000000000000000000000000000000000000000..8512bc99e7451c73e5513b834fb6aa448717c646 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -0,0 +1,101 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +import functools +import paddle + + +class TestNumelOp(OpTest): + def setUp(self): + self.op_type = "size" + self.init() + x = np.random.random((self.shape)).astype("float64") + self.inputs = {'Input': x, } + self.outputs = {'Out': np.array([np.size(x)])} + + def test_check_output(self): + self.check_output() + + def init(self): + self.shape = (6, 56, 8, 55) + + +class TestNumelOp1(TestNumelOp): + def init(self): + self.shape = (11, 66) + + +class TestNumelOp2(TestNumelOp): + def init(self): + self.shape = (0, ) + + +class TestNumelOoAPI(unittest.TestCase): + def test_numel_static(self): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + shape1 = [2, 1, 4, 5] + shape2 = [1, 4, 5] + x_1 = paddle.data(shape=shape1, dtype='int32', name='x_1') + x_2 = paddle.data(shape=shape2, dtype='int32', name='x_2') + input_1 = np.random.random(shape1).astype("int32") + input_2 = np.random.random(shape2).astype("int32") + out_1 = paddle.numel(x_1) + out_2 = paddle.numel(x_2) + exe = paddle.static.Executor(place=paddle.CPUPlace()) + res_1, res_2 = exe.run(feed={ + "x_1": input_1, + "x_2": input_2, + }, + fetch_list=[out_1, out_2]) + assert (np.array_equal( + res_1, np.array([np.size(input_1)]).astype("int64"))) + assert (np.array_equal( + res_2, np.array([np.size(input_2)]).astype("int64"))) + + def test_numel_imperative(self): + paddle.disable_static(paddle.CPUPlace()) + input_1 = np.random.random([2, 1, 4, 5]).astype("int32") + input_2 = np.random.random([1, 4, 5]).astype("int32") + x_1 = paddle.to_variable(input_1) + x_2 = paddle.to_variable(input_2) + out_1 = paddle.numel(x_1) + out_2 = paddle.numel(x_2) + assert (np.array_equal(out_1.numpy().item(0), np.size(input_1))) + assert (np.array_equal(out_2.numpy().item(0), np.size(input_2))) + paddle.enable_static() + + def test_error(self): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + + def test_x_type(): + shape = [1, 4, 5] + input_1 = np.random.random(shape).astype("int32") + out_1 = paddle.numel(input_1) + + self.assertRaises(TypeError, test_x_type) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pad3d_op.py b/python/paddle/fluid/tests/unittests/test_pad3d_op.py new file mode 100644 index 0000000000000000000000000000000000000000..68589e6d8182f9e6dfdabfc7bce4c20bec521740 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_pad3d_op.py @@ -0,0 +1,670 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import paddle.fluid.core as core + +from paddle.fluid import Program, program_guard, Executor, default_main_program + + +class TestPad3dOp(OpTest): + def setUp(self): + paddle.enable_static() + self.value = 0.0 + self.variable_paddings = False + self.initTestCase() + self.op_type = "pad3d" + self.inputs = {'X': np.random.random(self.shape).astype("float64")} + self.attrs = {} + if self.variable_paddings: + self.attrs['paddings'] = [] + self.inputs['Paddings'] = np.array(self.paddings).flatten().astype( + "int32") + else: + self.attrs['paddings'] = np.array(self.paddings).flatten().astype( + "int32") + self.attrs['value'] = self.value + self.attrs['mode'] = self.mode + self.attrs['data_format'] = self.data_format + if self.data_format == "NCDHW": + paddings = [ + (0, 0), + (0, 0), + (self.paddings[4], self.paddings[5]), + (self.paddings[2], self.paddings[3]), + (self.paddings[0], self.paddings[1]), + ] + else: + paddings = [ + (0, 0), + (self.paddings[4], self.paddings[5]), + (self.paddings[2], self.paddings[3]), + (self.paddings[0], self.paddings[1]), + (0, 0), + ] + if self.mode == "constant": + out = np.pad(self.inputs['X'], + paddings, + mode=self.mode, + constant_values=self.value) + elif self.mode == "reflect": + out = np.pad(self.inputs['X'], paddings, mode=self.mode) + elif self.mode == "replicate": + out = np.pad(self.inputs['X'], paddings, mode="edge") + elif self.mode == "circular": + out = np.pad(self.inputs['X'], paddings, mode="wrap") + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X'], 'Out') + + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 0, 0, 0, 0, 0] + self.mode = "constant" + self.data_format = "NCDHW" + self.pad_value = 0.0 + + +class TestCase1(TestPad3dOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 1, 2, 3, 4, 5] + self.mode = "constant" + self.data_format = "NCDHW" + self.value = 1.0 + + +class TestCase2(TestPad3dOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [1, 1, 1, 1, 1, 1] + self.mode = "constant" + self.data_format = "NDHWC" + self.value = 1.0 + + +class TestCase3(TestPad3dOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 1, 1, 0, 2, 3] + self.mode = "reflect" + self.data_format = "NCDHW" + + +class TestCase4(TestPad3dOp): + def initTestCase(self): + self.shape = (4, 4, 4, 4, 4) + self.paddings = [0, 1, 2, 1, 2, 3] + self.mode = "reflect" + self.data_format = "NDHWC" + + +class TestCase5(TestPad3dOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 1, 2, 3, 2, 1] + self.mode = "replicate" + self.data_format = "NCDHW" + + +class TestCase6(TestPad3dOp): + def initTestCase(self): + self.shape = (4, 4, 4, 4, 4) + self.paddings = [5, 4, 2, 1, 2, 3] + self.mode = "replicate" + self.data_format = "NDHWC" + + +class TestCase7(TestPad3dOp): + def initTestCase(self): + self.shape = (2, 3, 4, 5, 6) + self.paddings = [0, 1, 2, 3, 2, 1] + self.mode = "circular" + self.data_format = "NCDHW" + + +class TestCase8(TestPad3dOp): + def initTestCase(self): + self.shape = (4, 4, 4, 4, 4) + self.paddings = [0, 1, 2, 1, 2, 3] + self.mode = "circular" + self.data_format = "NDHWC" + + +class TestPadAPI(unittest.TestCase): + def setUp(self): + self.places = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def check_static_result_1(self, place): + paddle.enable_static() + with program_guard(Program(), Program()): + input_shape = (1, 2, 3, 4, 5) + pad = [1, 2, 1, 1, 3, 4] + mode = "constant" + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + result = F.pad(x=x, pad=pad, value=value, mode=mode) + exe = Executor(place) + fetches = exe.run(default_main_program(), + feed={"x": input_data}, + fetch_list=[result]) + + np_out = self._get_numpy_out(input_data, pad, mode, value) + self.assertTrue(np.allclose(fetches[0], np_out)) + + def check_static_result_2(self, place): + paddle.enable_static() + with program_guard(Program(), Program()): + input_shape = (2, 3, 4, 5, 6) + pad = [1, 2, 1, 1, 1, 2] + mode = "reflect" + input_data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + result1 = F.pad(x=x, pad=pad, mode=mode, data_format="NCDHW") + result2 = F.pad(x=x, pad=pad, mode=mode, data_format="NDHWC") + exe = Executor(place) + fetches = exe.run(default_main_program(), + feed={"x": input_data}, + fetch_list=[result1, result2]) + + np_out1 = self._get_numpy_out( + input_data, pad, mode, data_format="NCDHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, data_format="NDHWC") + self.assertTrue(np.allclose(fetches[0], np_out1)) + self.assertTrue(np.allclose(fetches[1], np_out2)) + + def check_static_result_3(self, place): + paddle.enable_static() + with program_guard(Program(), Program()): + input_shape = (2, 3, 4, 5, 6) + pad = [1, 2, 1, 1, 3, 4] + mode = "replicate" + input_data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + result1 = F.pad(x=x, pad=pad, mode=mode, data_format="NCDHW") + result2 = F.pad(x=x, pad=pad, mode=mode, data_format="NDHWC") + exe = Executor(place) + fetches = exe.run(default_main_program(), + feed={"x": input_data}, + fetch_list=[result1, result2]) + + np_out1 = self._get_numpy_out( + input_data, pad, mode, data_format="NCDHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, data_format="NDHWC") + self.assertTrue(np.allclose(fetches[0], np_out1)) + self.assertTrue(np.allclose(fetches[1], np_out2)) + + def check_static_result_4(self, place): + paddle.enable_static() + with program_guard(Program(), Program()): + input_shape = (2, 3, 4, 5, 6) + pad = [1, 2, 1, 1, 3, 4] + mode = "circular" + input_data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + result1 = F.pad(x=x, pad=pad, mode=mode, data_format="NCDHW") + result2 = F.pad(x=x, pad=pad, mode=mode, data_format="NDHWC") + exe = Executor(place) + fetches = exe.run(default_main_program(), + feed={"x": input_data}, + fetch_list=[result1, result2]) + + np_out1 = self._get_numpy_out( + input_data, pad, mode, data_format="NCDHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, data_format="NDHWC") + self.assertTrue(np.allclose(fetches[0], np_out1)) + self.assertTrue(np.allclose(fetches[1], np_out2)) + + def _get_numpy_out(self, + input_data, + pad, + mode, + value=0, + data_format="NCDHW"): + if data_format == "NCDHW": + pad = [ + (0, 0), + (0, 0), + (pad[4], pad[5]), + (pad[2], pad[3]), + (pad[0], pad[1]), + ] + elif data_format == "NDHWC": + pad = [ + (0, 0), + (pad[4], pad[5]), + (pad[2], pad[3]), + (pad[0], pad[1]), + (0, 0), + ] + elif data_format == "NCHW": + pad = [ + (0, 0), + (0, 0), + (pad[2], pad[3]), + (pad[0], pad[1]), + ] + elif data_format == "NHWC": + pad = [ + (0, 0), + (pad[2], pad[3]), + (pad[0], pad[1]), + (0, 0), + ] + elif data_format == "NCL": + pad = [ + (0, 0), + (0, 0), + (pad[0], pad[1]), + ] + elif data_format == "NLC": + pad = [ + (0, 0), + (pad[0], pad[1]), + (0, 0), + ] + + if mode == "constant": + out = np.pad(input_data, pad, mode=mode, constant_values=value) + elif mode == "reflect": + out = np.pad(input_data, pad, mode=mode) + elif mode == "replicate": + out = np.pad(input_data, pad, mode="edge") + elif mode == "circular": + out = np.pad(input_data, pad, mode="wrap") + + return out + + def test_static(self): + for place in self.places: + self.check_static_result_1(place=place) + self.check_static_result_2(place=place) + self.check_static_result_3(place=place) + self.check_static_result_4(place=place) + + def test_dygraph_1(self): + paddle.disable_static() + + input_shape = (1, 2, 3, 4, 5) + pad = [1, 2, 1, 1, 3, 4] + mode = "constant" + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + np_out1 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NCDHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NDHWC") + tensor_data = paddle.to_tensor(input_data) + + y1 = F.pad(tensor_data, + pad=pad, + mode=mode, + value=value, + data_format="NCDHW") + y2 = F.pad(tensor_data, + pad=pad, + mode=mode, + value=value, + data_format="NDHWC") + + self.assertTrue(np.allclose(y1.numpy(), np_out1)) + self.assertTrue(np.allclose(y2.numpy(), np_out2)) + + def test_dygraph_2(self): + paddle.disable_static() + + input_shape = (2, 3, 4, 5) + pad = [1, 1, 3, 4] + mode = "constant" + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + np_out1 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NCHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NHWC") + + tensor_data = paddle.to_tensor(input_data) + tensor_pad = paddle.to_tensor(pad, dtype="int32") + + y1 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NCHW") + y2 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NHWC") + + self.assertTrue(np.allclose(y1.numpy(), np_out1)) + self.assertTrue(np.allclose(y2.numpy(), np_out2)) + + def test_dygraph_2(self): + paddle.disable_static() + + input_shape = (2, 3, 4, 5) + pad = [1, 1, 3, 4] + mode = "constant" + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + np_out1 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NCHW") + np_out2 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NHWC") + tensor_data = paddle.to_tensor(input_data) + tensor_pad = paddle.to_tensor(pad, dtype="int32") + + y1 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NCHW") + y2 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NHWC") + + self.assertTrue(np.allclose(y1.numpy(), np_out1)) + self.assertTrue(np.allclose(y2.numpy(), np_out2)) + + def test_dygraph_3(self): + paddle.disable_static() + + input_shape = (3, 4, 5) + pad = [3, 4] + mode = "constant" + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + np_out1 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NCL") + np_out2 = self._get_numpy_out( + input_data, pad, mode, value, data_format="NLC") + tensor_data = paddle.to_tensor(input_data) + tensor_pad = paddle.to_tensor(pad, dtype="int32") + + y1 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NCL") + y2 = F.pad(tensor_data, + pad=tensor_pad, + mode=mode, + value=value, + data_format="NLC") + + self.assertTrue(np.allclose(y1.numpy(), np_out1)) + self.assertTrue(np.allclose(y2.numpy(), np_out2)) + + +class TestPad1dAPI(unittest.TestCase): + def _get_numpy_out(self, + input_data, + pad, + mode, + value=0.0, + data_format="NCL"): + if data_format == "NCL": + pad = [ + (0, 0), + (0, 0), + (pad[0], pad[1]), + ] + else: + pad = [ + (0, 0), + (pad[0], pad[1]), + (0, 0), + ] + + if mode == "constant": + out = np.pad(input_data, pad, mode=mode, constant_values=value) + elif mode == "reflect": + out = np.pad(input_data, pad, mode=mode) + elif mode == "replicate": + out = np.pad(input_data, pad, mode="edge") + + return out + + def setUp(self): + self.places = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_class(self): + paddle.disable_static() + for place in self.places: + input_shape = (3, 4, 5) + pad = [1, 2] + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + + pad_reflection = nn.ReflectionPad1d(padding=pad) + pad_replication = nn.ReplicationPad1d(padding=pad) + pad_constant = nn.ConstantPad1d(padding=pad, value=value) + + data = paddle.to_tensor(input_data) + + output = pad_reflection(data) + np_out = self._get_numpy_out( + input_data, pad, "reflect", data_format="NCL") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_replication(data) + np_out = self._get_numpy_out( + input_data, pad, "replicate", data_format="NCL") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_constant(data) + np_out = self._get_numpy_out( + input_data, pad, "constant", value=value, data_format="NCL") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + +class TestPad2dAPI(unittest.TestCase): + def _get_numpy_out(self, + input_data, + pad, + mode, + value=0.0, + data_format="NCHW"): + if data_format == "NCHW": + pad = [ + (0, 0), + (0, 0), + (pad[2], pad[3]), + (pad[0], pad[1]), + ] + else: + pad = [ + (0, 0), + (pad[2], pad[3]), + (pad[0], pad[1]), + (0, 0), + ] + + if mode == "constant": + out = np.pad(input_data, pad, mode=mode, constant_values=value) + elif mode == "reflect": + out = np.pad(input_data, pad, mode=mode) + elif mode == "replicate": + out = np.pad(input_data, pad, mode="edge") + + return out + + def setUp(self): + self.places = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_class(self): + paddle.disable_static() + for place in self.places: + input_shape = (3, 4, 5, 6) + pad = [1, 2, 2, 1] + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + + pad_reflection = nn.ReflectionPad2d(padding=pad) + pad_replication = nn.ReplicationPad2d(padding=pad) + pad_constant = nn.ConstantPad2d(padding=pad, value=value) + pad_zero = nn.ZeroPad2d(padding=pad) + + data = paddle.to_tensor(input_data) + + output = pad_reflection(data) + np_out = self._get_numpy_out( + input_data, pad, "reflect", data_format="NCHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_replication(data) + np_out = self._get_numpy_out( + input_data, pad, "replicate", data_format="NCHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_constant(data) + np_out = self._get_numpy_out( + input_data, pad, "constant", value=value, data_format="NCHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_zero(data) + np_out = self._get_numpy_out( + input_data, pad, "constant", value=0, data_format="NCHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + +class TestPad3dAPI(unittest.TestCase): + def _get_numpy_out(self, + input_data, + pad, + mode, + value=0.0, + data_format="NCDHW"): + if data_format == "NCDHW": + pad = [ + (0, 0), + (0, 0), + (pad[4], pad[5]), + (pad[2], pad[3]), + (pad[0], pad[1]), + ] + else: + pad = [ + (0, 0), + (pad[4], pad[5]), + (pad[2], pad[3]), + (pad[0], pad[1]), + (0, 0), + ] + + if mode == "constant": + out = np.pad(input_data, pad, mode=mode, constant_values=value) + elif mode == "reflect": + out = np.pad(input_data, pad, mode=mode) + elif mode == "replicate": + out = np.pad(input_data, pad, mode="edge") + + return out + + def setUp(self): + self.places = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_class(self): + paddle.disable_static() + for place in self.places: + input_shape = (3, 4, 5, 6, 7) + pad = [1, 2, 2, 1, 1, 0] + value = 100 + input_data = np.random.rand(*input_shape).astype(np.float32) + + pad_replication = nn.ReplicationPad3d(padding=pad) + pad_constant = nn.ConstantPad3d(padding=pad, value=value) + + data = paddle.to_tensor(input_data) + + output = pad_replication(data) + np_out = self._get_numpy_out( + input_data, pad, "replicate", data_format="NCDHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + output = pad_constant(data) + np_out = self._get_numpy_out( + input_data, pad, "constant", value=value, data_format="NCDHW") + self.assertTrue(np.allclose(output.numpy(), np_out)) + + +class TestPad3dOpError(unittest.TestCase): + def test_errors(self): + def test_variable(): + input_shape = (1, 2, 3, 4, 5) + data = np.random.rand(*input_shape).astype(np.float32) + F.pad(x=data, paddings=[1, 1, 1, 1, 1, 1]) + + def test_reflect_1(): + input_shape = (1, 2, 3, 4, 5) + data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + y = F.pad(x, pad=[5, 6, 1, 1, 1, 1], value=1, mode='reflect') + place = paddle.CPUPlace() + exe = Executor(place) + outputs = exe.run(feed={'x': data}, fetch_list=[y.name]) + + def test_reflect_2(): + input_shape = (1, 2, 3, 4, 5) + data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + y = F.pad(x, pad=[1, 1, 4, 3, 1, 1], value=1, mode='reflect') + place = paddle.CPUPlace() + exe = Executor(place) + outputs = exe.run(feed={'x': data}, fetch_list=[y.name]) + + def test_reflect_3(): + input_shape = (1, 2, 3, 4, 5) + data = np.random.rand(*input_shape).astype(np.float32) + x = paddle.data(name="x", shape=input_shape) + y = F.pad(x, pad=[1, 1, 1, 1, 2, 3], value=1, mode='reflect') + place = paddle.CPUPlace() + exe = Executor(place) + outputs = exe.run(feed={'x': data}, fetch_list=[y.name]) + + self.assertRaises(TypeError, test_variable) + + self.assertRaises(Exception, test_reflect_1) + + self.assertRaises(Exception, test_reflect_2) + + self.assertRaises(Exception, test_reflect_3) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..7cf1e9711b74b31e15b732f87addbc9fa653152f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase +import paddle.fluid as fluid + +import os +flag_name = os.path.splitext(__file__)[0] + + +class TestParallelDygraphMnist(TestDistBase): + def _setup_config(self): + self._sync_mode = False + self._nccl2_mode = True + self._dygraph = True + + def test_mnist(self): + if fluid.core.is_compiled_with_cuda(): + self.check_with_place( + "parallel_dygraph_sync_batch_norm.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py index 0bcb4be3b7fb9380932cf137ac8e4939dcd77288..cf93f39ab8c5c92aa075f2f0a7dca9a5c5d9f485 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py @@ -16,16 +16,17 @@ from __future__ import print_function import unittest import numpy as np + from op_test import OpTest +import paddle +import paddle.nn.functional as F +import paddle.fluid.core as core +import paddle.fluid as fluid -class TestPixelShuffle(OpTest): - def setUp(self): - self.op_type = "pixel_shuffle" - n, c, h, w = 2, 9, 4, 4 - up_factor = 3 - shape = [n, c, h, w] - x = np.random.random(shape).astype("float64") +def pixel_shuffle_np(x, up_factor, data_format="NCHW"): + if data_format == "NCHW": + n, c, h, w = x.shape new_shape = (n, c // (up_factor * up_factor), up_factor, up_factor, h, w) # reshape to (num,output_channel,upscale_factor,upscale_factor,h,w) @@ -34,10 +35,42 @@ class TestPixelShuffle(OpTest): npresult = npresult.transpose(0, 1, 4, 2, 5, 3) oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor] npresult = np.reshape(npresult, oshape) + return npresult + else: + n, h, w, c = x.shape + new_shape = (n, h, w, c // (up_factor * up_factor), up_factor, + up_factor) + # reshape to (num,h,w,output_channel,upscale_factor,upscale_factor) + npresult = np.reshape(x, new_shape) + # transpose to (num,h,upscale_factor,w,upscale_factor,output_channel) + npresult = npresult.transpose(0, 1, 4, 2, 5, 3) + oshape = [n, h * up_factor, w * up_factor, c // (up_factor * up_factor)] + npresult = np.reshape(npresult, oshape) + return npresult + + +class TestPixelShuffleOp(OpTest): + def setUp(self): + self.op_type = "pixel_shuffle" + self.init_data_format() + n, c, h, w = 2, 9, 4, 4 + + if self.format == "NCHW": + shape = [n, c, h, w] + if self.format == "NHWC": + shape = [n, h, w, c] + + up_factor = 3 + + x = np.random.random(shape).astype("float64") + npresult = pixel_shuffle_np(x, up_factor, self.format) self.inputs = {'X': x} self.outputs = {'Out': npresult} - self.attrs = {'upscale_factor': up_factor} + self.attrs = {'upscale_factor': up_factor, "data_format": self.format} + + def init_data_format(self): + self.format = "NCHW" def test_check_output(self): self.check_output() @@ -46,5 +79,141 @@ class TestPixelShuffle(OpTest): self.check_grad(['X'], 'Out') +class TestChannelLast(TestPixelShuffleOp): + def init_data_format(self): + self.format = "NHWC" + + +class TestPixelShuffleAPI(unittest.TestCase): + def setUp(self): + self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64") + self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64") + self.out_1_np = pixel_shuffle_np(self.x_1_np, 3) + self.out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC") + + def test_static_graph_functional(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.enable_static() + x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64") + x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64") + out_1 = F.pixel_shuffle(x_1, 3) + out_2 = F.pixel_shuffle(x_2, 3, "NHWC") + + exe = paddle.static.Executor(place=place) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": self.x_1_np}, + fetch_list=out_1, + use_prune=True) + + res_2 = exe.run(fluid.default_main_program(), + feed={"x2": self.x_2_np}, + fetch_list=out_2, + use_prune=True) + + assert np.allclose(res_1, self.out_1_np) + assert np.allclose(res_2, self.out_2_np) + + # same test between layer and functional in this op. + def test_static_graph_layer(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.enable_static() + x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64") + x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64") + # init instance + ps_1 = paddle.nn.PixelShuffle(3) + ps_2 = paddle.nn.PixelShuffle(3, "NHWC") + out_1 = ps_1(x_1) + out_2 = ps_2(x_2) + out_1_np = pixel_shuffle_np(self.x_1_np, 3) + out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC") + + exe = paddle.static.Executor(place=place) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": self.x_1_np}, + fetch_list=out_1, + use_prune=True) + + res_2 = exe.run(fluid.default_main_program(), + feed={"x2": self.x_2_np}, + fetch_list=out_2, + use_prune=True) + + assert np.allclose(res_1, out_1_np) + assert np.allclose(res_2, out_2_np) + + def run_dygraph(self, up_factor, data_format): + + n, c, h, w = 2, 9, 4, 4 + + if data_format == "NCHW": + shape = [n, c, h, w] + if data_format == "NHWC": + shape = [n, h, w, c] + + x = np.random.random(shape).astype("float64") + + npresult = pixel_shuffle_np(x, up_factor, data_format) + + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.disable_static(place=place) + + pixel_shuffle = paddle.nn.PixelShuffle( + up_factor, data_format=data_format) + result = pixel_shuffle(paddle.to_tensor(x)) + + self.assertTrue(np.allclose(result.numpy(), npresult)) + + result_functional = F.pixel_shuffle( + paddle.to_tensor(x), 3, data_format) + self.assertTrue(np.allclose(result_functional.numpy(), npresult)) + + def test_dygraph1(self): + self.run_dygraph(3, "NCHW") + + def test_dygraph2(self): + self.run_dygraph(3, "NHWC") + + +class TestPixelShuffleError(unittest.TestCase): + def test_error_functional(self): + def error_upscale_factor(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3.33) + + self.assertRaises(TypeError, error_upscale_factor) + + def error_data_format(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3, "WOW") + + self.assertRaises(ValueError, error_data_format) + + def test_error_layer(self): + def error_upscale_factor_layer(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + ps = paddle.nn.PixelShuffle(3.33) + + self.assertRaises(TypeError, error_upscale_factor_layer) + + def error_data_format_layer(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + ps = paddle.nn.PixelShuffle(3, "MEOW") + + self.assertRaises(ValueError, error_data_format_layer) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py new file mode 100644 index 0000000000000000000000000000000000000000..b1a25ad3529e8b0a4126bc458838ecd876e5af30 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -0,0 +1,373 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +import paddle +import paddle.nn.functional as F +import paddle.fluid as fluid + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def max_pool1D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=False, + adaptive=False, + data_type=np.float64): + N, C, L = x.shape + if global_pool == 1: + ksize = [L] + if adaptive: + L_out = ksize[0] + else: + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 + ) // strides[0] + 1 if ceil_mode else ( + L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 + + out = np.zeros((N, C, L_out)) + for i in range(L_out): + if adaptive: + r_start = adaptive_start_index(i, L, ksize[0]) + r_end = adaptive_end_index(i, L, ksize[0]) + else: + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L)) + x_masked = x[:, :, r_start:r_end] + + out[:, :, i] = np.max(x_masked, axis=(2)) + return out + + +def avg_pool1D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=False, + adaptive=False, + data_type=np.float64): + N, C, L = x.shape + if global_pool == 1: + ksize = [L] + if adaptive: + L_out = ksize[0] + else: + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 + ) // strides[0] + 1 if ceil_mode else ( + L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 + + out = np.zeros((N, C, L_out)) + for i in range(L_out): + if adaptive: + r_start = adaptive_start_index(i, L, ksize[0]) + r_end = adaptive_end_index(i, L, ksize[0]) + else: + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L)) + x_masked = x[:, :, r_start:r_end] + + field_size = (r_end - r_start) \ + if (exclusive or adaptive) else (ksize[0]) + if data_type == np.int8 or data_type == np.uint8: + out[:, :, i] = (np.rint( + np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type) + else: + out[:, :, i] = (np.sum(x_masked, axis=(2)) / + field_size).astype(data_type) + return out + + +class TestPool1d_API(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_avg_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.avg_pool1d(input, kernel_size=2, stride=2, padding=0) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = avg_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0], ceil_mode=False) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_avg_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.avg_pool1d(input, kernel_size=2, stride=2, padding=[0]) + + result_np = avg_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool1d_dg = paddle.nn.layer.AvgPool1d( + kernel_size=2, stride=None, padding=0) + result = avg_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.max_pool1d(input, kernel_size=2, stride=2, padding=[0]) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = max_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_max_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.max_pool1d(input, kernel_size=2, stride=2, padding=0) + + result_np = max_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool1d_dg = paddle.nn.layer.MaxPool1d( + kernel_size=2, stride=None, padding=0) + result = max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_adaptive_max_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.adaptive_max_pool1d(input, output_size=16) + + result_np = max_pool1D_forward_naive( + input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d( + output_size=16) + result = ada_max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_adaptive_avg_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.adaptive_avg_pool1d(input, output_size=16) + result_np = avg_pool1D_forward_naive( + input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d( + output_size=16) + result = ada_max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_adaptive_max_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.adaptive_max_pool1d(input, output_size=16) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = max_pool1D_forward_naive( + input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_adaptive_avg_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.adaptive_avg_pool1d(input, output_size=16) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = avg_pool1D_forward_naive( + input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_max_dygraph_padding_same(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.max_pool1d( + input, kernel_size=2, stride=2, padding="SAME") + + result_np = max_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_dygraph_padding_same(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.avg_pool1d( + input, kernel_size=2, stride=2, padding="SAME") + + result_np = avg_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def test_pool1d(self): + for place in self.places: + + self.check_max_dygraph_results(place) + self.check_avg_dygraph_results(place) + self.check_max_static_results(place) + self.check_avg_static_results(place) + self.check_adaptive_max_dygraph_results(place) + self.check_adaptive_avg_dygraph_results(place) + self.check_adaptive_max_static_results(place) + self.check_adaptive_avg_static_results(place) + self.check_max_dygraph_padding_same(place) + self.check_avg_dygraph_padding_same(place) + + +class TestPool2dError_API(unittest.TestCase): + def test_error_api(self): + def run1(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[2]] + res_pd = F.max_pool1d( + input_pd, kernel_size=2, stride=2, padding=padding) + + self.assertRaises(ValueError, run1) + + def run2(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[2]] + res_pd = F.max_pool1d( + input_pd, kernel_size=2, stride=2, padding=padding) + + self.assertRaises(ValueError, run2) + + def run3(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "padding" + res_pd = F.max_pool1d( + input_pd, kernel_size=2, stride=2, padding=padding) + + self.assertRaises(ValueError, run3) + + def run4(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = F.max_pool1d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) + + self.assertRaises(ValueError, run4) + + def run5(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = F.max_pool1d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) + + self.assertRaises(ValueError, run5) + + def run6(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = F.avg_pool1d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) + + self.assertRaises(ValueError, run6) + + def run7(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "paddle" + res_pd = F.avg_pool1d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True) + + self.assertRaises(ValueError, run7) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py new file mode 100644 index 0000000000000000000000000000000000000000..73df0885d8fed4ddc4c03c91d2c331e72772e398 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py @@ -0,0 +1,375 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from test_pool2d_op import adaptive_start_index, adaptive_end_index, pool2D_forward_naive +import unittest +from op_test import OpTest +import numpy as np +import paddle.fluid.core as core +from paddle.nn.functional import * +import paddle.fluid as fluid +import paddle + + +class TestPool2d_API(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_avg_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 32, 32], dtype="float32") + result = avg_pool2d(input, kernel_size=2, stride=2, padding=0) + + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_avg_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool2d(input, kernel_size=2, stride=2, padding=0) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool2d_dg = paddle.nn.layer.AvgPool2d( + kernel_size=2, stride=2, padding=0) + result = avg_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 32, 32], dtype="float32") + result = max_pool2d(input, kernel_size=2, stride=2, padding=0) + + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_max_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool2d( + input, kernel_size=2, stride=2, padding=0, return_indices=False) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool2d_dg = paddle.nn.layer.MaxPool2d( + kernel_size=2, stride=2, padding=0) + result = max_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_stride_is_none(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result, indices = max_pool2d( + input, + kernel_size=2, + stride=None, + padding="SAME", + return_indices=True) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max', + padding_algorithm="SAME") + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool2d_dg = paddle.nn.layer.MaxPool2d( + kernel_size=2, stride=2, padding=0) + result = max_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_dygraph_stride_is_none(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool2d( + input, kernel_size=2, stride=None, padding="SAME") + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg', + padding_algorithm="SAME") + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool2d_dg = paddle.nn.layer.AvgPool2d( + kernel_size=2, stride=2, padding=0) + result = avg_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_padding(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + padding = [[0, 0], [0, 0], [0, 0], [0, 0]] + result = max_pool2d( + input, + kernel_size=2, + stride=2, + padding=padding, + return_indices=False) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool2d_dg = paddle.nn.layer.MaxPool2d( + kernel_size=2, stride=2, padding=0) + result = max_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_divisor(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + padding = [[0, 0], [0, 0], [0, 0], [0, 0]] + result = avg_pool2d( + input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=4) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='avg') + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool2d_dg = paddle.nn.layer.AvgPool2d( + kernel_size=2, stride=2, padding=0) + result = avg_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def test_pool2d(self): + for place in self.places: + + self.check_max_dygraph_results(place) + self.check_avg_dygraph_results(place) + self.check_max_static_results(place) + self.check_avg_static_results(place) + self.check_max_dygraph_stride_is_none(place) + self.check_avg_dygraph_stride_is_none(place) + self.check_max_dygraph_padding(place) + self.check_avg_divisor(place) + + +class TestPool2dError_API(unittest.TestCase): + def test_error_api(self): + def run1(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[0, 1], [0, 0], [0, 0], [0, 0]] + res_pd = max_pool2d( + input_pd, kernel_size=2, stride=2, padding=padding) + + self.assertRaises(ValueError, run1) + + def run2(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[0, 1], [0, 0], [0, 0], [0, 0]] + res_pd = max_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') + + self.assertRaises(ValueError, run2) + + def run3(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "padding" + res_pd = max_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') + + self.assertRaises(ValueError, run3) + + def run3_avg(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "padding" + res_pd = avg_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') + + self.assertRaises(ValueError, run3_avg) + + def run4(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = max_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') + + self.assertRaises(ValueError, run4) + + def run4_avg(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = avg_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') + + self.assertRaises(ValueError, run4_avg) + + def run5(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "padding" + res_pd = avg_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NHWC') + + self.assertRaises(ValueError, run5) + + def run6(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = avg_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=True, + data_format='NHWC') + + self.assertRaises(ValueError, run6) + + def run7(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = avg_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=False, + data_format='NNNN') + + self.assertRaises(ValueError, run7) + + def run8(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = "VALID" + res_pd = max_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + ceil_mode=False, + data_format='NNNN') + + self.assertRaises(ValueError, run8) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py new file mode 100644 index 0000000000000000000000000000000000000000..cc078e9aae7aafe55e937b80270dd012fd64ff70 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py @@ -0,0 +1,341 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import unittest +import numpy as np +import paddle +import paddle.fluid.core as core +from op_test import OpTest +import paddle.fluid as fluid +from paddle.nn.functional import avg_pool3d, max_pool3d +from test_pool3d_op import adaptive_start_index, adaptive_end_index, pool3D_forward_naive + + +class TestPool3d_API(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_avg_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 32, 32, 32], dtype="float32") + result = avg_pool3d(input, kernel_size=2, stride=2, padding=0) + + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg') + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_avg_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool3d(input, kernel_size=2, stride=2, padding="SAME") + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg', + padding_algorithm="SAME") + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool3d_dg = paddle.nn.layer.AvgPool3d( + kernel_size=2, stride=None, padding="SAME") + result = avg_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data( + name="input", shape=[2, 3, 32, 32, 32], dtype="float32") + result = max_pool3d(input, kernel_size=2, stride=2, padding=0) + + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def check_max_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool3d(input, kernel_size=2, stride=2, padding=0) + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') + + self.assertTrue(np.allclose(result.numpy(), result_np)) + max_pool3d_dg = paddle.nn.layer.MaxPool3d( + kernel_size=2, stride=None, padding=0) + result = max_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_stride_is_none(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result, indices = max_pool3d( + input, + kernel_size=2, + stride=None, + padding="SAME", + return_indices=True) + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max', + padding_algorithm="SAME") + + self.assertTrue(np.allclose(result.numpy(), result_np)) + max_pool3d_dg = paddle.nn.layer.MaxPool3d( + kernel_size=2, stride=2, padding=0) + result = max_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_padding(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + padding = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] + result = max_pool3d(input, kernel_size=2, stride=2, padding=padding) + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') + + self.assertTrue(np.allclose(result.numpy(), result_np)) + max_pool3d_dg = paddle.nn.layer.MaxPool3d( + kernel_size=2, stride=2, padding=0) + result = max_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + padding = [0, 0, 0, 0, 0, 0] + result = max_pool3d(input, kernel_size=2, stride=2, padding=padding) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_divisor(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + padding = 0 + result = avg_pool3d( + input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=8) + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='avg') + + self.assertTrue(np.allclose(result.numpy(), result_np)) + avg_pool3d_dg = paddle.nn.layer.AvgPool3d( + kernel_size=2, stride=2, padding=0) + result = avg_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + padding = [0, 0, 0, 0, 0, 0] + result = avg_pool3d( + input, + kernel_size=2, + stride=2, + padding=padding, + divisor_override=8) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def test_pool3d(self): + for place in self.places: + + self.check_max_dygraph_results(place) + self.check_avg_dygraph_results(place) + self.check_max_static_results(place) + self.check_avg_static_results(place) + self.check_max_dygraph_stride_is_none(place) + self.check_max_dygraph_padding(place) + self.check_avg_divisor(place) + + +class TestPool3dError_API(unittest.TestCase): + def test_error_api(self): + def run1(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] + res_pd = avg_pool3d( + input_pd, kernel_size=2, stride=2, padding=padding) + + self.assertRaises(ValueError, run1) + + def run2(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] + res_pd = avg_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NCDHW') + + self.assertRaises(ValueError, run2) + + def run3(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + padding = [[0, 1], [0, 0], [0, 0], [0, 0], [0, 0]] + res_pd = avg_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding=padding, + data_format='NDHWC') + + self.assertRaises(ValueError, run3) + + def run4(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = avg_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NNNN') + + self.assertRaises(ValueError, run4) + + def run5(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = max_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NNNN') + + self.assertRaises(ValueError, run5) + + def run6(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = avg_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding="padding", + data_format='NNNN') + + self.assertRaises(ValueError, run6) + + def run7(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = max_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding="padding", + data_format='NNNN') + + self.assertRaises(ValueError, run7) + + def run8(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = avg_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding="VALID", + ceil_mode=True, + data_format='NNNN') + + self.assertRaises(ValueError, run8) + + def run9(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = max_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding="VALID", + ceil_mode=True, + data_format='NNNN') + + self.assertRaises(ValueError, run9) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py index 0a38bd277bfd1d157c944f5ae28810eb7d5f1dae..16388ff8f5f042326ac5705a5f69919f4f8061c2 100644 --- a/python/paddle/fluid/tests/unittests/test_prelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py @@ -18,23 +18,134 @@ import unittest import numpy as np import paddle.fluid as fluid import six -import paddle.fluid as fluid +import paddle.fluid.core as core from paddle.fluid import Program, program_guard from op_test import OpTest, skip_check_grad_ci +import paddle +import paddle.nn.functional as F + + +def ref_prelu(x, weight): + x_t = x.copy() + weight = weight.reshape(1, -1, 1, 1) + neg_indices = x <= 0 + assert x.shape == neg_indices.shape + x_t[neg_indices] = (x_t * weight)[neg_indices] + return (x_t, ) + + +def ref_prelu_nn(x, num_parameters, init): + weight_np = np.full((num_parameters), init) + return ref_prelu(x, weight_np) -class TestPReluOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program()): +class TestFunctionalPReluAPI(unittest.TestCase): + def setUp(self): + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else paddle.CPUPlace() + self.x_np = np.random.uniform(-1., 1., [1, 2, 3, 4]).astype('float32') + self.weight_np_0 = np.random.randn(1).astype('float32') + self.weight_np_1 = np.random.randn(self.x_np.shape[1]).astype('float32') + + def static_check(self, weight_np): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, 'float32') + weight = paddle.data('Alpha', weight_np.shape, 'float32') + out = F.prelu(x, weight) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np, + 'Alpha': weight_np}, + fetch_list=[out]) + out_ref = ref_prelu(self.x_np, weight_np) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def dygraph_check(self, weight_np): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + weight = paddle.to_tensor(weight_np) + out = F.prelu(x, weight) + out_ref = ref_prelu(self.x_np, weight_np) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + paddle.enable_static() + + def test_static_api(self): + self.static_check(self.weight_np_0) + self.static_check(self.weight_np_1) + + def test_dygraph_api(self): + self.dygraph_check(self.weight_np_0) + self.dygraph_check(self.weight_np_1) + + def test_error(self): + with paddle.static.program_guard(paddle.static.Program()): + weight_fp32 = paddle.data( + name='weight_fp32', shape=[1], dtype='float32') # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.prelu, 0.1, 'all') + self.assertRaises(TypeError, F.prelu, x=1, weight=weight_fp32) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.prelu, x_int32, 'all') - # support the input dtype is float32 - x_fp16 = fluid.layers.data( - name='x_fp16', shape=[12, 10], dtype='float32') - fluid.layers.prelu(x_fp16, 'all') + x_int32 = paddle.data(name='x_int32', shape=[2, 3], dtype='int32') + self.assertRaises(TypeError, F.prelu, x=x_int32, weight=weight_fp32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[2, 3], dtype='float16') + F.prelu(x=x_fp16, weight=weight_fp32) + + +class TestNNPReluAPI(unittest.TestCase): + def setUp(self): + self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else paddle.CPUPlace() + self.x_np = np.ones([1, 2, 3, 4]).astype('float32') + + def test_static_api(self): + startup_program = paddle.static.Program() + train_program = paddle.static.Program() + with paddle.static.program_guard(train_program, startup_program): + x = paddle.data(name='X', shape=self.x_np.shape, dtype='float32') + m = paddle.nn.PReLU() + out = m(x) + exe = paddle.static.Executor(self.place) + exe.run(startup_program) + res = exe.run(train_program, + feed={'X': self.x_np}, + fetch_list=[out]) + out_ref = ref_prelu_nn(self.x_np, 1, 0.25) + self.assertEqual(np.allclose(out_ref, res[0]), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + + x = paddle.to_tensor(self.x_np) + m = paddle.nn.PReLU() + out = m(x) + out_ref = ref_prelu_nn(self.x_np, 1, 0.25) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + + x = paddle.to_tensor(self.x_np) + m = paddle.nn.PReLU(num_parameters=self.x_np.shape[1]) + out = m(x) + out_ref = ref_prelu_nn(self.x_np, self.x_np.shape[1], 0.25) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + + x = paddle.to_tensor(self.x_np) + m = paddle.nn.PReLU(init=0.5) + out = m(x) + out_ref = ref_prelu_nn(self.x_np, 1, 0.5) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + + x = paddle.to_tensor(self.x_np) + m = paddle.nn.PReLU(weight_attr=fluid.ParamAttr(name="weight")) + out = m(x) + out_ref = ref_prelu_nn(self.x_np, 1, 0.25) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + + x = paddle.to_tensor(self.x_np) + m = paddle.nn.PReLU(weight_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(0.5))) + out = m(x) + out_ref = ref_prelu_nn(self.x_np, 1, 0.5) + self.assertEqual(np.allclose(out_ref, out.numpy()), True) + + paddle.enable_static() class PReluTest(OpTest): diff --git a/python/paddle/fluid/tests/unittests/test_prod_op.py b/python/paddle/fluid/tests/unittests/test_prod_op.py new file mode 100644 index 0000000000000000000000000000000000000000..158683907253e2ebc5adab6799c75ffd914df1c7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_prod_op.py @@ -0,0 +1,132 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import unittest +import numpy as np + + +class TestProdOp(unittest.TestCase): + def setUp(self): + self.input = np.random.random(size=(10, 10, 5)).astype(np.float32) + + def run_imperative(self): + input = paddle.to_tensor(self.input) + dy_result = paddle.prod(input) + expected_result = np.prod(self.input) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=1) + expected_result = np.prod(self.input, axis=1) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=-1) + expected_result = np.prod(self.input, axis=-1) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=[0, 1]) + expected_result = np.prod(self.input, axis=(0, 1)) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=1, keepdim=True) + expected_result = np.prod(self.input, axis=1, keepdims=True) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=1, dtype='int64') + expected_result = np.prod(self.input, axis=1, dtype=np.int64) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + dy_result = paddle.prod(input, axis=1, keepdim=True, dtype='int64') + expected_result = np.prod( + self.input, axis=1, keepdims=True, dtype=np.int64) + self.assertTrue(np.allclose(dy_result.numpy(), expected_result)) + + def run_static(self, use_gpu=False): + input = paddle.data(name='input', shape=[10, 10, 5], dtype='float32') + result0 = paddle.prod(input) + result1 = paddle.prod(input, axis=1) + result2 = paddle.prod(input, axis=-1) + result3 = paddle.prod(input, axis=[0, 1]) + result4 = paddle.prod(input, axis=1, keepdim=True) + result5 = paddle.prod(input, axis=1, dtype='int64') + result6 = paddle.prod(input, axis=1, keepdim=True, dtype='int64') + + place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + static_result = exe.run(feed={"input": self.input}, + fetch_list=[ + result0, result1, result2, result3, result4, + result5, result6 + ]) + + expected_result = np.prod(self.input) + self.assertTrue(np.allclose(static_result[0], expected_result)) + expected_result = np.prod(self.input, axis=1) + self.assertTrue(np.allclose(static_result[1], expected_result)) + expected_result = np.prod(self.input, axis=-1) + self.assertTrue(np.allclose(static_result[2], expected_result)) + expected_result = np.prod(self.input, axis=(0, 1)) + self.assertTrue(np.allclose(static_result[3], expected_result)) + expected_result = np.prod(self.input, axis=1, keepdims=True) + self.assertTrue(np.allclose(static_result[4], expected_result)) + expected_result = np.prod(self.input, axis=1, dtype=np.int64) + self.assertTrue(np.allclose(static_result[5], expected_result)) + expected_result = np.prod( + self.input, axis=1, keepdims=True, dtype=np.int64) + self.assertTrue(np.allclose(static_result[6], expected_result)) + + def test_cpu(self): + paddle.disable_static(place=paddle.CPUPlace()) + self.run_imperative() + paddle.enable_static() + + with paddle.static.program_guard(paddle.static.Program()): + self.run_static() + + def test_gpu(self): + if not paddle.fluid.core.is_compiled_with_cuda(): + return + + paddle.disable_static(place=paddle.CUDAPlace(0)) + self.run_imperative() + paddle.enable_static() + + with paddle.static.program_guard(paddle.static.Program()): + self.run_static(use_gpu=True) + + +class TestProdOpError(unittest.TestCase): + def test_error(self): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): + x = paddle.data(name='x', shape=[2, 2, 4], dtype='float32') + bool_x = paddle.data(name='bool_x', shape=[2, 2, 4], dtype='bool') + # The argument x shoule be a Tensor + self.assertRaises(TypeError, paddle.prod, [1]) + + # The data type of x should be float32, float64, int32, int64 + self.assertRaises(TypeError, paddle.prod, bool_x) + + # The argument axis's type shoule be int ,list or tuple + self.assertRaises(TypeError, paddle.prod, x, 1.5) + + # The argument dtype of prod_op should be float32, float64, int32 or int64. + self.assertRaises(TypeError, paddle.prod, x, 'bool') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_query_op.py b/python/paddle/fluid/tests/unittests/test_query_op.py new file mode 100644 index 0000000000000000000000000000000000000000..fc8ce5ad5f6b89b28fb2ddddd15d5b315fe4c0e4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_query_op.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle +from paddle.fluid import core + + +class TestCudnnVersion(unittest.TestCase): + def test_no_cudnn(self): + cudnn_version = paddle.get_cudnn_version() + if not core.is_compiled_with_cuda(): + self.assertEqual((cudnn_version is None), True) + else: + self.assertEqual((isinstance(cudnn_version, int)), True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_randint_op.py b/python/paddle/fluid/tests/unittests/test_randint_op.py index 715d66aa3332cef649f867052400b9769bacd979..88b07f5df83f8f967f8ba76e78b37ecfb2c54276 100644 --- a/python/paddle/fluid/tests/unittests/test_randint_op.py +++ b/python/paddle/fluid/tests/unittests/test_randint_op.py @@ -125,7 +125,7 @@ class TestRandintAPI(unittest.TestCase): out4 = paddle.randint( low=-100, high=100, shape=[dim_1, 5, dim_2], dtype='int32') # shape is a tensor and dtype is 'float64' - var_shape = paddle.nn.data( + var_shape = paddle.static.data( name='var_shape', shape=[2], dtype="int64") out5 = paddle.randint( low=1, high=1000, shape=var_shape, dtype='int64') diff --git a/python/paddle/fluid/tests/unittests/test_randn_op.py b/python/paddle/fluid/tests/unittests/test_randn_op.py index 8b560f18f9f7bc614e38c1245f48398f808525ed..9d2c03f3bba914d8f6b06b54ce0e19c168edb9e3 100644 --- a/python/paddle/fluid/tests/unittests/test_randn_op.py +++ b/python/paddle/fluid/tests/unittests/test_randn_op.py @@ -34,7 +34,7 @@ class TestRandnOp(unittest.TestCase): dim_2 = paddle.fill_constant([1], "int32", 50) x3 = paddle.randn([dim_1, dim_2, 784]) - var_shape = paddle.nn.data('X', [2], 'int32') + var_shape = paddle.static.data('X', [2], 'int32') x4 = paddle.randn(var_shape) place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py new file mode 100644 index 0000000000000000000000000000000000000000..2933abe46c1b87959c9f61975c02a41e91dfbef3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_random_seed.py @@ -0,0 +1,463 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test cloud role maker.""" + +from __future__ import print_function +import os +import unittest +import paddle.fluid.generator as generator + +import time # temp for debug +import paddle.fluid as fluid +import numpy as np +import paddle +import paddle.fluid.core as core + + +class TestGeneratorSeed(unittest.TestCase): + """ + Test cases for cpu generator seed. + """ + + def test_generator_uniform_random_dygraph(self): + """Test Generator seed.""" + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(12312321111) + x = fluid.layers.uniform_random([10], dtype="float32", min=0.0, max=1.0) + st1 = gen.get_state() + x1 = fluid.layers.uniform_random( + [10], dtype="float32", min=0.0, max=1.0) + gen.set_state(st1) + x2 = fluid.layers.uniform_random( + [10], dtype="float32", min=0.0, max=1.0) + gen.manual_seed(12312321111) + x3 = fluid.layers.uniform_random( + [10], dtype="float32", min=0.0, max=1.0) + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + if not core.is_compiled_with_cuda(): + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_uniform_random_static(self): + + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + result_1 = fluid.layers.uniform_random(shape=[3, 4]) + result_2 = fluid.layers.uniform_random(shape=[3, 4]) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + def test_gen_dropout_dygraph(self): + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(111111111) + st = gen.get_state() + # x = np.arange(1,101).reshape(2,50).astype("float32") + x = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + y = fluid.layers.dropout(x, 0.5) + gen.manual_seed(111111111) + #gen.set_state(st) + x1 = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + y1 = fluid.layers.dropout(x1, 0.5) + y_np = y.numpy() + y1_np = y1.numpy() + #print(y_np) + #print(y1_np) + if not core.is_compiled_with_cuda(): + print(">>>>>>> dropout dygraph >>>>>>>") + self.assertTrue(np.allclose(y_np, y1_np)) + + def test_gen_dropout_static(self): + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + x_1 = fluid.layers.uniform_random(shape=[2, 10]) + y_1 = fluid.layers.dropout(x_1, 0.5) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, feed={}, fetch_list=[y_1]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, feed={}, fetch_list=[y_1]) + out1_np = np.array(out1[0]) + out2_np = np.array(out2[0]) + # print(out1_np) + # print(out2_np) + if not core.is_compiled_with_cuda(): + print(">>>>>>> dropout static >>>>>>>") + self.assertTrue(np.allclose(out1_np, out2_np)) + + def test_generator_gaussian_random_dygraph(self): + """Test Generator seed.""" + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(12312321111) + x = fluid.layers.gaussian_random([10], dtype="float32") + st1 = gen.get_state() + x1 = fluid.layers.gaussian_random([10], dtype="float32") + gen.set_state(st1) + x2 = fluid.layers.gaussian_random([10], dtype="float32") + gen.manual_seed(12312321111) + x3 = fluid.layers.gaussian_random([10], dtype="float32") + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + if not core.is_compiled_with_cuda(): + print(">>>>>>> gaussian random dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_gaussian_random_static(self): + + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + result_1 = fluid.layers.gaussian_random(shape=[3, 4]) + result_2 = fluid.layers.gaussian_random(shape=[3, 4]) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> gaussian random static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + def test_generator_randint_dygraph(self): + """Test Generator seed.""" + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(12312321111) + x = paddle.randint(low=10, shape=[10], dtype="int32") + st1 = gen.get_state() + x1 = paddle.randint(low=10, shape=[10], dtype="int32") + gen.set_state(st1) + x2 = paddle.randint(low=10, shape=[10], dtype="int32") + gen.manual_seed(12312321111) + x3 = paddle.randint(low=10, shape=[10], dtype="int32") + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + if not core.is_compiled_with_cuda(): + print(">>>>>>> randint dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_ranint_static(self): + + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + result_1 = paddle.randint(low=10, shape=[3, 4]) + result_2 = paddle.randint(low=10, shape=[3, 4]) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> randint static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + def test_generator_randperm_dygraph(self): + """Test Generator seed.""" + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(12312321111) + x = paddle.randperm(10) + st1 = gen.get_state() + x1 = paddle.randperm(10) + gen.set_state(st1) + x2 = paddle.randperm(10) + gen.manual_seed(12312321111) + x3 = paddle.randperm(10) + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + # print("## {}".format(x1_np)) + # print("## {}".format(x2_np)) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> randperm dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_randperm_static(self): + + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + result_1 = paddle.randperm(10) + result_2 = paddle.randperm(10) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> randperm static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + def test_generator_sampling_id_dygraph(self): + """Test Generator seed.""" + gen = generator.Generator() + + fluid.enable_dygraph() + + gen.manual_seed(12312321111) + x = fluid.layers.uniform_random( + [10, 10], dtype="float32", min=0.0, max=1.0) + y = fluid.layers.sampling_id(x) + st1 = gen.get_state() + x1 = fluid.layers.uniform_random( + [10, 10], dtype="float32", min=0.0, max=1.0) + y1 = fluid.layers.sampling_id(x) + gen.set_state(st1) + x2 = fluid.layers.uniform_random( + [10, 10], dtype="float32", min=0.0, max=1.0) + y2 = fluid.layers.sampling_id(x) + gen.manual_seed(12312321111) + x3 = fluid.layers.uniform_random( + [10, 10], dtype="float32", min=0.0, max=1.0) + y3 = fluid.layers.sampling_id(x) + + x_np = y.numpy() + x1_np = y1.numpy() + x2_np = y2.numpy() + x3_np = y3.numpy() + + print("## {}".format(x1_np)) + print("## {}".format(x2_np)) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> sampling id dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_randperm_static(self): + + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + x = fluid.layers.uniform_random(shape=[10, 10]) + result_1 = fluid.layers.sampling_id(x) + result_2 = fluid.layers.sampling_id(x) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + gen.manual_seed(123123143) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> sampling id static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + def test_gen_TruncatedNormal_initializer(self): + fluid.disable_dygraph() + + gen = generator.Generator() + gen.manual_seed(123123143) + cur_state = gen.get_state() + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + x = fluid.layers.uniform_random(shape=[2, 10]) + result_1 = fluid.layers.fc( + input=x, + size=10, + param_attr=fluid.initializer.TruncatedNormal( + loc=0.0, scale=2.0)) + result_2 = fluid.layers.fc( + input=x, + size=10, + param_attr=fluid.initializer.TruncatedNormal( + loc=0.0, scale=2.0)) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + #gen.set_state(cur_state) + + #gen.set_state(cur_state) + gen.manual_seed(123123143) + with fluid.program_guard(train_program, startup_program): + exe.run(startup_program) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + print(out1_res1) + print(out1_res2) + print(out2_res1) + print(out2_res2) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> sampling id static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index 0531da2b06ec37fd60389cd2abb85822ebc9d0f9..cf35f9dbcdaaae1357ccdfd6b5cba85ac98d2037 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -67,22 +67,6 @@ class TestSumOp6D(OpTest): self.check_grad(['X'], 'Out') -class TestMeanOp(OpTest): - def setUp(self): - self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} - self.attrs = {'dim': [1]} - self.outputs = { - 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") @@ -318,21 +302,6 @@ class TestReduceAll(Test1DReduce): self.outputs = {'Out': self.inputs['X'].sum()} -## reduction in multi dims -class TestReduceMeanOpMultiAxises(OpTest): - def setUp(self): - self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} - self.attrs = {'dim': [1, 2]} - self.outputs = {'Out': self.inputs['X'].mean(axis=(1, 2))} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") @@ -420,40 +389,6 @@ class TestReduceSumWithNumelOne(OpTest): self.check_grad(['X'], 'Out') -class TestReduceMeanWithDimOne(OpTest): - def setUp(self): - self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((100, 1, 1)).astype("float64")} - self.attrs = {'dim': [1], 'keep_dim': False} - self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=False) - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - -class TestReduceMeanWithNumelOne(OpTest): - def setUp(self): - self.op_type = "reduce_mean" - self.inputs = {'X': np.random.random((100, 1)).astype("float64")} - self.attrs = {'dim': [1], 'keep_dim': True} - self.outputs = { - 'Out': self.inputs['X'].mean( - axis=tuple(self.attrs['dim']), keepdims=True) - } - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - class TestReduceAll(OpTest): def setUp(self): self.op_type = "reduce_sum" @@ -536,18 +471,6 @@ class TestReduceSumOpError(unittest.TestCase): self.assertRaises(TypeError, fluid.layers.reduce_sum, x2) -class TestReduceMeanOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - # The input type of reduce_mean_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - self.assertRaises(TypeError, fluid.layers.reduce_mean, x1) - # The input dtype of reduce_mean_op must be float32 or float64 or int32 or int64. - x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") - self.assertRaises(TypeError, fluid.layers.reduce_mean, x2) - - class API_TestSumOpError(unittest.TestCase): def test_errors(self): def test_dtype1(): @@ -580,10 +503,10 @@ class API_TestSumOpError(unittest.TestCase): class API_TestSumOp(unittest.TestCase): - def test_1(self): + def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="float32") - result_sum = paddle.sum(input=data, dim=1, dtype="float64") + result_sum = paddle.sum(x=data, axis=1, dtype="float64") place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.rand(10, 10).astype(np.float32) @@ -593,7 +516,7 @@ class API_TestSumOp(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(input=data, dim=1, dtype="int64") + result_sum = paddle.sum(x=data, axis=1, dtype="int64") place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) @@ -603,7 +526,7 @@ class API_TestSumOp(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(input=data, dim=1) + result_sum = paddle.sum(x=data, axis=1) place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) @@ -612,39 +535,41 @@ class API_TestSumOp(unittest.TestCase): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(input=data, dim=1) + result_sum = paddle.sum(x=data, axis=1) place = fluid.CPUPlace() exe = fluid.Executor(place) input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) res, = exe.run(feed={"data": input_data}, fetch_list=[result_sum]) self.assertEqual((res == np.sum(input_data, axis=1)).all(), True) - with fluid.dygraph.guard(): - np_x = np.array([10, 10]).astype('float64') - x = fluid.dygraph.to_variable(np_x) - z = paddle.sum(x, dim=0) - np_z = z.numpy() - z_expected = np.array(np.sum(np_x, axis=0)) - self.assertEqual((np_z == z_expected).all(), True) - - -class API_TestReduceMeanOp(unittest.TestCase): - def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): - x = fluid.data("x", shape=[10, 10], dtype="float32") - out = fluid.layers.reduce_mean(input=x, dim=1) + input_data = np.random.randint(10, size=(5, 5, 5)).astype(np.int32) + data = fluid.data("data", shape=[5, 5, 5], dtype="int32") + sum1 = paddle.sum(x=data, axis=[0, 1]) + sum2 = paddle.sum(x=data, axis=()) + place = fluid.CPUPlace() exe = fluid.Executor(place) - x_np = np.random.rand(10, 10).astype(np.float32) - res = exe.run(feed={"x": x_np}, fetch_list=[out]) - self.assertEqual(np.allclose(res[0], np.mean(x_np, axis=1)), True) + res1, res2 = exe.run(feed={"data": input_data}, + fetch_list=[sum1, sum2]) + + self.assertEqual((res1 == np.sum(input_data, axis=(0, 1))).all(), True) + self.assertEqual( + (res2 == np.sum(input_data, axis=(0, 1, 2))).all(), True) def test_dygraph(self): + np_x = np.random.random([2, 3, 4]).astype('int32') with fluid.dygraph.guard(): - x_np = np.random.rand(10, 10).astype(np.float32) - x = fluid.dygraph.to_variable(x_np) - out = fluid.layers.reduce_mean(input=x, dim=1) - self.assertEqual(np.allclose(out.numpy(), np.mean(x_np, axis=1)), True) + x = fluid.dygraph.to_variable(np_x) + out0 = paddle.sum(x).numpy() + out1 = paddle.sum(x, axis=0).numpy() + out2 = paddle.sum(x, axis=(0, 1)).numpy() + out3 = paddle.sum(x, axis=(0, 1, 2)).numpy() + + self.assertTrue((out0 == np.sum(np_x, axis=(0, 1, 2))).all()) + self.assertTrue((out1 == np.sum(np_x, axis=0)).all()) + self.assertTrue((out2 == np.sum(np_x, axis=(0, 1))).all()) + self.assertTrue((out3 == np.sum(np_x, axis=(0, 1, 2))).all()) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py index 3dfd9023f5af30ff289c4dc55a0c275402bc3067..275f9d21f9f8eca653a030bfe5c74071397f33c1 100644 --- a/python/paddle/fluid/tests/unittests/test_reshape_op.py +++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py @@ -18,6 +18,7 @@ import unittest import numpy as np from op_test import OpTest +import paddle import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard @@ -227,35 +228,43 @@ class TestReshapeUint8Op(TestReshapeInt8Op): # Test python API class TestReshapeAPI(unittest.TestCase): - # situation 1: have shape( list, no tensor), no actual shape(Tensor) - def test_1(self): + def _set_paddle_api(self): + self.fill_constant = paddle.fill_constant + self.data = paddle.data + self.reshape = paddle.reshape + self.to_tensor = paddle.to_tensor + + def _set_fluid_api(self): + self.fill_constant = fluid.layers.fill_constant + self.data = fluid.data + self.reshape = fluid.layers.reshape + + def _test_api(self): input = np.random.random([2, 25]).astype("float32") shape = [2, 5, 5] - positive_five = fluid.layers.fill_constant([1], "int32", 5) - x = fluid.layers.data( - name="x", shape=[2, 25], append_batch_size=False, dtype="float32") + main_prog = Program() + with program_guard(main_prog, Program()): + positive_five = self.fill_constant([1], "int32", 5) + x = self.data(name="x", shape=[2, 25], dtype="float32") - actual_shape = fluid.layers.data( - name="shape", - shape=[1, 3], - append_batch_size=False, - dtype="float32") + actual_shape = self.data(name="shape", shape=[3], dtype="int32") - # situation 1: have shape( list, no tensor), no actual shape(Tensor) - out_1 = fluid.layers.reshape(x, shape) + # situation 1: have shape( list, no tensor), no actual shape(Tensor) + out_1 = self.reshape(x, shape) - # situation 2: have shape(list, no tensor), have actual shape(Tensor) - out_2 = fluid.layers.reshape(x, shape=shape, actual_shape=actual_shape) + # situation 2: have shape(list, no tensor), have actual shape(Tensor) + out_2 = fluid.layers.reshape( + x, shape=shape, actual_shape=actual_shape) - # Situation 3: have shape(list, have tensor), no actual shape(Tensor) - out_3 = fluid.layers.reshape(x, shape=[positive_five, 10]) + # Situation 3: have shape(list, have tensor), no actual shape(Tensor) + out_3 = self.reshape(x, shape=[positive_five, 10]) - # Situation 4: have shape(Tensor), no actual shape(Tensor) - out_4 = fluid.layers.reshape(x, shape=actual_shape) + # Situation 4: have shape(Tensor), no actual shape(Tensor) + out_4 = self.reshape(x, shape=actual_shape) exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3, res_4 = exe.run( - fluid.default_main_program(), + main_prog, feed={"x": input, "shape": np.array([2, 5, 5]).astype("int32")}, fetch_list=[out_1, out_2, out_3, out_4]) @@ -265,76 +274,108 @@ class TestReshapeAPI(unittest.TestCase): assert np.array_equal(res_3, input.reshape([5, 10])) assert np.array_equal(res_4, input.reshape(shape)) + def test_paddle_api(self): + self._set_paddle_api() + self._test_api() + + def test_fluid_api(self): + self._set_fluid_api() + self._test_api() + + def test_imperative(self): + self._set_paddle_api() + input = np.random.random([2, 25]).astype("float32") + shape = [2, 5, 5] + with fluid.dygraph.guard(): + x = self.to_tensor(input) + positive_five = self.fill_constant([1], "int32", 5) + + out_1 = self.reshape(x, shape) + + out_2 = self.reshape(x, shape=[positive_five, 10]) + + shape_tensor = self.to_tensor(np.array([2, 5, 5]).astype("int32")) + out_3 = self.reshape(x, shape=shape_tensor) + + assert np.array_equal(out_1.numpy(), input.reshape(shape)) + assert np.array_equal(out_2.numpy(), input.reshape([5, 10])) + assert np.array_equal(out_3.numpy(), input.reshape(shape)) + # Test Input Error class TestReshapeOpError(unittest.TestCase): - def test_errors(self): + def _set_paddle_api(self): + self.data = paddle.data + self.reshape = paddle.reshape + + def _set_fluid_api(self): + self.data = fluid.data + self.reshape = fluid.layers.reshape + + def _test_errors(self): with program_guard(Program(), Program()): # The x type of reshape_op must be Variable. def test_x_type(): x1 = fluid.create_lod_tensor( np.array([[-1]]), [[1]], fluid.CPUPlace()) - fluid.layers.reshape(x1, shape=[1]) + self.reshape(x1, shape=[1]) self.assertRaises(TypeError, test_x_type) # The x dtype of reshape_op must be float16, float32, float64, int32 or int64. def test_x_dtype(): - x2 = fluid.layers.data( - name="x2", - shape=[2, 25], - append_batch_size=False, - dtype="bool") - fluid.layers.reshape(x2, shape=[2, 5, 5]) + x2 = self.data(name="x2", shape=[2, 25], dtype="bool") + self.reshape(x2, shape=[2, 5, 5]) self.assertRaises(TypeError, test_x_dtype) def test_x_dtype_float16(): - x_float16 = fluid.layers.data( - name="x_float16", - shape=[2, 25], - append_batch_size=False, - dtype="float16") - fluid.layers.reshape(x_float16, shape=[2, 5, 5]) + x_float16 = self.data( + name="x_float16", shape=[2, 25], dtype="float16") + self.reshape(x_float16, shape=[2, 5, 5]) test_x_dtype_float16() - x3 = fluid.layers.data( - name="x3", - shape=[2, 25], - append_batch_size=False, - dtype="float32") + x3 = self.data(name="x3", shape=[2, 25], dtype="float32") # The argument shape's type of reshape_op must be list, tuple or Variable. def test_shape_type(): - fluid.layers.reshape(x3, shape=1) + self.reshape(x3, shape=1) self.assertRaises(TypeError, test_shape_type) # The argument actual_shape's type of reshape_op must be Variable or None. def test_actual_shape_type(): - fluid.layers.reshape(x3, shape=[25, 2], actual_shape=1) + self.reshape(x3, shape=[25, 2], actual_shape=1) self.assertRaises(TypeError, test_actual_shape_type) # The argument shape have more than one -1. def test_shape_1(): - fluid.layers.reshape(x3, shape=[-1, -1, 5]) + self.reshape(x3, shape=[-1, -1, 5]) self.assertRaises(AssertionError, test_shape_1) # The argument shape have element 0 whose index exceed the input dimension. def test_shape_2(): - fluid.layers.reshape(x3, [2, 5, 5, 0]) + self.reshape(x3, [2, 5, 5, 0]) self.assertRaises(AssertionError, test_shape_2) # The argument shape have more than one negative value. def test_shape_3(): - fluid.layers.reshape(x3, [-1, -2, 5]) + self.reshape(x3, [-1, -2, 5]) self.assertRaises(AssertionError, test_shape_3) + def test_paddle_api_error(self): + self._set_paddle_api() + self._test_errors() + + def test_fluid_api_error(self): + self._set_fluid_api() + self._test_errors() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py index db4b922afcd230c852a4859ec5a7e7497d59ffff..9abbee173852baf9db998aad3b71edabdb3e11ed 100644 --- a/python/paddle/fluid/tests/unittests/test_retain_graph.py +++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py @@ -26,7 +26,7 @@ paddle.manual_seed(SEED) class Generator(fluid.dygraph.Layer): def __init__(self): super(Generator, self).__init__() - self.conv1 = paddle.nn.Conv2D(3, 3, 3, 1) + self.conv1 = paddle.nn.Conv2d(3, 3, 3, padding=1) def forward(self, x): x = self.conv1(x) @@ -37,7 +37,7 @@ class Generator(fluid.dygraph.Layer): class Discriminator(fluid.dygraph.Layer): def __init__(self): super(Discriminator, self).__init__() - self.convd = paddle.nn.Conv2D(6, 3, 1) + self.convd = paddle.nn.Conv2d(6, 3, 1) def forward(self, x): x = self.convd(x) @@ -60,8 +60,10 @@ class TestRetainGraph(unittest.TestCase): interpolatesv = fake_data elif type == 'mixed': alpha = paddle.rand((real_data.shape[0], 1)) - alpha = paddle.expand( - alpha, [1, np.prod(real_data.shape) // real_data.shape[0]]) + alpha = paddle.expand(alpha, [ + real_data.shape[0], + np.prod(real_data.shape) // real_data.shape[0] + ]) alpha = paddle.reshape(alpha, real_data.shape) interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) else: @@ -94,8 +96,8 @@ class TestRetainGraph(unittest.TestCase): g = Generator() d = Discriminator() - optim_g = paddle.optimizer.Adam(parameter_list=g.parameters()) - optim_d = paddle.optimizer.Adam(parameter_list=d.parameters()) + optim_g = paddle.optimizer.Adam(parameters=g.parameters()) + optim_d = paddle.optimizer.Adam(parameters=d.parameters()) gan_criterion = paddle.nn.MSELoss() l1_criterion = paddle.nn.L1Loss() diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index eb12bc741767340a3e7e3580a8b95065d4267693..0f225758ced3bf7d6fd821be09f2dbf11ff1cc6d 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -20,6 +20,7 @@ import numpy as np import paddle.fluid.core as core from paddle.fluid.op import Operator import paddle.fluid as fluid +import paddle def create_selected_rows_and_tensor(scope, place, height, row_num, @@ -222,5 +223,59 @@ class TestRmspropOp(TestBase): size=size) +class TestRMSPropV2(unittest.TestCase): + def test_rmsprop_dygraph(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_tensor(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + # This can be any optimizer supported by dygraph. + adam = paddle.optimizer.RMSProp( + learning_rate=0.01, + parameters=linear.parameters(), + weight_decay=0.01) + out = linear(a) + out.backward() + adam.step() + adam.clear_gradients() + + def test_rmsprop(self): + place = fluid.CPUPlace() + main = fluid.Program() + with fluid.program_guard(main): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + + rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1) + rms_optimizer.minimize(avg_cost) + + fetch_list = [avg_cost] + train_reader = paddle.batch( + paddle.dataset.uci_housing.train(), batch_size=1) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for data in train_reader(): + exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) + + def test_raise_error(self): + self.assertRaises(ValueError, paddle.optimizer.RMSProp, None) + self.assertRaises( + ValueError, paddle.optimizer.RMSProp, learning_rate=0.1, rho=None) + self.assertRaises( + ValueError, + paddle.optimizer.RMSProp, + learning_rate=0.1, + epsilon=None) + self.assertRaises( + ValueError, + paddle.optimizer.RMSProp, + learning_rate=0.1, + momentum=None) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py index 6ca194b2694b6c7537ceb94e11eb1a1a0aeb8d8d..7e2ef36c1a7fda5c31049ec9c752c5226bfb89dc 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py @@ -248,7 +248,8 @@ class PolicyGradient(object): func=reward_func, x=[action, length], out=reward) neg_log_prob = layers.cross_entropy(act_prob, action) cost = neg_log_prob * reward - cost = (layers.reduce_sum(cost) / layers.reduce_sum(length) + cost = (layers.reduce_sum(cost) / + layers.cast(layers.reduce_sum(length), "float32") ) if length is not None else layers.reduce_mean(cost) optimizer = fluid.optimizer.Adam(self.lr) optimizer.minimize(cost) diff --git a/python/paddle/fluid/tests/unittests/test_scatter_op.py b/python/paddle/fluid/tests/unittests/test_scatter_op.py index 5e9c67c1a7a29b69a977cc94487fc3d26f24eeb8..ce3b060828ac475a10d61bf756423069ab0a70c1 100644 --- a/python/paddle/fluid/tests/unittests/test_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/test_scatter_op.py @@ -16,6 +16,8 @@ from __future__ import print_function import unittest import numpy as np +import paddle +import paddle.fluid as fluid from op_test import OpTest import paddle.fluid.core as core @@ -173,5 +175,55 @@ class TestScatterOp5(OpTest): self.check_grad_with_place(place, ['Updates'], 'Out', in_place=True) +class TestScatterAPI(unittest.TestCase): + def setUp(self): + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_static_result(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[3, 2], dtype="float64") + index = fluid.data(name="index", shape=[4], dtype="int64") + updates = fluid.data(name="updates", shape=[4, 2], dtype="float64") + result = paddle.scatter(input, index, updates, False) + + input_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float64) + index_data = np.array([2, 1, 0, 1]).astype(np.int64) + updates_data = np.array( + [[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float64) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={ + "input": input_data, + "index": index_data, + "updates": updates_data + }, + fetch_list=[result]) + self.assertEqual((fetches[0] == \ + np.array([[3., 3.],[6., 6.],[1., 1.]])).all(), True) + + def test_static(self): + for place in self.places: + self.check_static_result(place=place) + + def test_dygraph(self): + for place in self.places: + with fluid.dygraph.guard(place): + x_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float64) + index_data = np.array([2, 1, 0, 1]).astype(np.int64) + updates_data = np.array( + [[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float64) + + x = fluid.dygraph.to_variable(x_data) + index = fluid.dygraph.to_variable(index_data) + updates = fluid.dygraph.to_variable(updates_data) + + output1 = paddle.scatter(x, index, updates, overwrite=False) + self.assertEqual((output1.numpy() == \ + np.array([[3., 3.],[6., 6.],[1., 1.]])).all(), True) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py index 6070c84ff236274cc1778d0dce9ab40d884ce7ec..590ef11e9cb5de7414ff8745b719e3ffb4e044d8 100644 --- a/python/paddle/fluid/tests/unittests/test_selu_op.py +++ b/python/paddle/fluid/tests/unittests/test_selu_op.py @@ -17,9 +17,26 @@ from __future__ import print_function import unittest import numpy as np import six +import paddle.fluid.core as core from op_test import OpTest +import paddle import paddle.fluid as fluid -from paddle.fluid import Program, program_guard +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.fluid import compiler, Program, program_guard + + +def ref_selu(x, + scale=1.0507009873554804934193349852946, + alpha=1.6732632423543772848170429916717): + out = np.copy(x) + out_flat = out.flatten() + for i in range(out_flat.size): + if out_flat[i] < 0: + out_flat[i] = alpha * np.exp(out_flat[i]) - alpha + out_flat[i] = scale * out_flat[i] + out = out_flat.reshape(x.shape) + return out class SeluTest(OpTest): @@ -39,17 +56,10 @@ class SeluTest(OpTest): # zero. x[np.abs(x) < 0.005] = 0.02 - x_flat = x.flatten() - - for i in range(x_flat.size): - if x_flat[i] < 0: - x_flat[i] = alpha * np.exp(x_flat[i]) - alpha - x_flat[i] = scale * x_flat[i] - - out_np = x_flat.reshape(self.x_shape) + out = ref_selu(x, scale, alpha) self.inputs = {'X': x} - self.outputs = {'Out': out_np} + self.outputs = {'Out': out} self.attrs = { 'alpha': alpha, @@ -69,17 +79,60 @@ class SeluTest(OpTest): self.check_grad(['X'], 'Out') -class TestSeluOpError(unittest.TestCase): +class TestSeluAPI(unittest.TestCase): + # test paddle.nn.SELU, paddle.nn.functional.selu + def setUp(self): + self.scale = 1.5 + self.alpha = 2.0 + self.x_np = np.random.normal(size=[3, 5, 5, 10]).astype(np.float64) + # Since zero point in selu is not differentiable, avoid randomize + # zero. + self.x_np[np.abs(self.x_np) < 0.005] = 0.02 + self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def test_static_api(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.x_np.shape, self.x_np.dtype) + out1 = F.selu(x, self.scale, self.alpha) + selu = paddle.nn.SELU(self.scale, self.alpha) + out2 = selu(x) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_selu(self.x_np, self.scale, self.alpha) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) + + def test_dygraph_api(self): + paddle.disable_static(self.place) + x = paddle.to_tensor(self.x_np) + out1 = F.selu(x, self.scale, self.alpha) + selu = paddle.nn.SELU(self.scale, self.alpha) + out2 = selu(x) + out_ref = ref_selu(self.x_np, self.scale, self.alpha) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + paddle.enable_static() + + def test_fluid_api(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data('X', self.x_np.shape, self.x_np.dtype) + out = fluid.layers.selu(x, self.scale, self.alpha) + exe = fluid.Executor(self.place) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) + out_ref = ref_selu(self.x_np, self.scale, self.alpha) + self.assertEqual(np.allclose(out_ref, res[0]), True) + def test_errors(self): - with program_guard(Program()): + with paddle.static.program_guard(paddle.static.Program()): # The input type must be Variable. - self.assertRaises(TypeError, fluid.layers.selu, 1) + self.assertRaises(TypeError, F.selu, 1) # The input dtype must be float16, float32, float64. - x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32') - self.assertRaises(TypeError, fluid.layers.selu, x_int32) - # support the input dtype is float32 - x_fp32 = fluid.data(name='x_fp32', shape=[12, 10], dtype='float32') - fluid.layers.selu(x_fp32) + x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32') + self.assertRaises(TypeError, F.selu, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16') + F.selu(x_fp16) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index b84e3b5377f2796803707dfd68cd5450c512fce7..da5080eabddc93f0c3d08f16e0a7c20b52af47e0 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard @@ -54,5 +55,32 @@ class TestSignOpError(unittest.TestCase): fluid.layers.sign(input4) +class TestSignAPI(unittest.TestCase): + def test_dygraph(self): + with fluid.dygraph.guard(): + np_x = np.array([-1., 0., -0., 1.2, 1.5], dtype='float64') + x = paddle.to_tensor(np_x) + z = paddle.sign(x) + np_z = z.numpy() + z_expected = np.sign(np_x) + self.assertEqual((np_z == z_expected).all(), True) + + def test_static(self): + with program_guard(Program(), Program()): + # The input type of sign_op must be Variable or numpy.ndarray. + input1 = 12 + self.assertRaises(TypeError, paddle.tensor.math.sign, input1) + # The input dtype of sign_op must be float16, float32, float64. + input2 = fluid.layers.data( + name='input2', shape=[12, 10], dtype="int32") + input3 = fluid.layers.data( + name='input3', shape=[12, 10], dtype="int64") + self.assertRaises(TypeError, paddle.tensor.math.sign, input2) + self.assertRaises(TypeError, paddle.tensor.math.sign, input3) + input4 = fluid.layers.data( + name='input4', shape=[4], dtype="float16") + paddle.sign(input4) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..9a97f57aaae5f290b20e34242b1b43e5e352223d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py @@ -0,0 +1,181 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import numpy as np +import unittest + + +def smooth_l1_loss_forward(val, delta): + abs_val = abs(val) + if abs_val <= delta: + return 0.5 * val * val + else: + return delta * (abs_val - 0.5 * delta) + + +def smooth_l1_loss_np(input, label, reduction='mean', delta=1.0): + diff = input - label + out = np.vectorize(smooth_l1_loss_forward)(diff, delta) + if reduction == 'sum': + return np.sum(out) + elif reduction == 'mean': + return np.mean(out) + elif reduction == 'none': + return out + + +class SmoothL1Loss(unittest.TestCase): + def setUp(self): + np.random.seed(123) + + def test_smooth_l1_loss_mean(self): + input_np = np.random.random([100, 200]).astype(np.float32) + label_np = np.random.random([100, 200]).astype(np.float32) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float32') + label = fluid.data(name='label', shape=[100, 200], dtype='float32') + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss() + ret = smooth_l1_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss() + dy_ret = smooth_l1_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = smooth_l1_loss_np(input_np, label_np, reduction='mean') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_smooth_l1_loss_sum(self): + input_np = np.random.random([100, 200]).astype(np.float32) + label_np = np.random.random([100, 200]).astype(np.float32) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float32') + label = fluid.data(name='label', shape=[100, 200], dtype='float32') + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='sum') + ret = smooth_l1_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='sum') + dy_ret = smooth_l1_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = smooth_l1_loss_np(input_np, label_np, reduction='sum') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_smooth_l1_loss_none(self): + input_np = np.random.random([100, 200]).astype(np.float32) + label_np = np.random.random([100, 200]).astype(np.float32) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float32') + label = fluid.data(name='label', shape=[100, 200], dtype='float32') + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='none') + ret = smooth_l1_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='none') + dy_ret = smooth_l1_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = smooth_l1_loss_np(input_np, label_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_smooth_l1_loss_delta(self): + input_np = np.random.random([100, 200]).astype(np.float32) + label_np = np.random.random([100, 200]).astype(np.float32) + delta = np.random.rand() + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float32') + label = fluid.data(name='label', shape=[100, 200], dtype='float32') + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(delta=delta) + ret = smooth_l1_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(delta=delta) + dy_ret = smooth_l1_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = smooth_l1_loss_np(input_np, label_np, delta=delta) + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 25e95216968b51e5093497b57768d7eb31952370..04d5cc941a4636da0352fe9221cdad8bdfcd2bd9 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -35,6 +35,15 @@ def stable_softmax(x): return exps / np.sum(exps) +def ref_softmax(x, axis=None, dtype=None): + x_t = x.copy() + if dtype is not None: + x_t = x_t.astype(dtype) + if axis is None: + axis = -1 + return np.apply_along_axis(stable_softmax, axis, x_t) + + class TestSoftmaxOp(OpTest): def get_x_shape(self): return [10, 10] @@ -93,20 +102,6 @@ class TestSoftmaxOp(OpTest): check_dygraph=(self.use_mkldnn == False)) -class TestSoftmaxOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - # The input type of softmax_op must be Variable. - x1 = fluid.create_lod_tensor( - np.array([[-1]]), [[1]], fluid.CPUPlace()) - self.assertRaises(TypeError, fluid.layers.softmax, x1) - # The input dtype of softmax_op must be float16, float32 or float64. - x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32") - self.assertRaises(TypeError, fluid.layers.softmax, x2) - x3 = fluid.layers.data(name='x3', shape=[4], dtype="float16") - fluid.layers.softmax(x3) - - class TestSoftmaxOp2(TestSoftmaxOp): def get_x_shape(self): return [2, 3, 4, 5] @@ -224,41 +219,59 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): return [2, 3, 4, 5] -class TestNnFunctionalSoftmaxApi(unittest.TestCase): +class TestSoftmaxAPI(unittest.TestCase): def setUp(self): self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda( ) else paddle.CPUPlace() self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np) - def test_api_static(self): - with program_guard(Program()): + def test_static_check(self): + with paddle.static.program_guard(paddle.static.Program()): x = paddle.data('X', self.x_np.shape, 'float32') - out = F.softmax(x) + out1 = F.softmax(x) + m = paddle.nn.Softmax() + out2 = m(x) exe = paddle.static.Executor(self.place) - res = exe.run(feed={'X': self.x_np}, fetch_list=[out]) - self.assertEqual(np.allclose(self.out_ref, res[0]), True) + res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) + out_ref = ref_softmax(self.x_np, axis=-1, dtype=None) + for r in res: + self.assertEqual(np.allclose(out_ref, r), True) - def test_api_imperative(self): + def test_dygraph_check(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x_np) - out = F.softmax(x) - self.assertEqual(np.allclose(self.out_ref, out.numpy()), True) - - out = F.softmax(x, axis=0) - out_ref = np.apply_along_axis(stable_softmax, 0, self.x_np) + x = paddle.to_tensor(self.x_np) + out1 = F.softmax(x) + m = paddle.nn.Softmax() + out2 = m(x) + out_ref = ref_softmax(self.x_np, axis=-1, dtype=None) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out1 = F.softmax(x, axis=0) + m = paddle.nn.Softmax(axis=0) + out2 = m(x) + out_ref = ref_softmax(self.x_np, axis=0, dtype=None) + for r in [out1, out2]: + self.assertEqual(np.allclose(out_ref, r.numpy()), True) + + out = F.softmax(x, dtype=np.float64) + out_ref = ref_softmax(self.x_np, axis=-1, dtype=np.float64) self.assertEqual(np.allclose(out_ref, out.numpy()), True) paddle.enable_static() def test_error(self): - with program_guard(Program(), Program()): - # The x should be variable and its dtype should be float32, float64. - self.assertRaises(TypeError, F.softmax, [1]) - - x = paddle.data(name='x', shape=[2, 3], dtype='int32') - self.assertRaises(TypeError, F.softmax, x) + with paddle.static.program_guard(paddle.static.Program()): + # The input type must be Variable. + self.assertRaises(TypeError, F.softmax, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.data(name='x_int32', shape=[2, 3], dtype='int32') + self.assertRaises(TypeError, F.softmax, x_int32) + # support the input dtype is float16 + x_fp16 = paddle.data(name='x_fp16', shape=[2, 3], dtype='float16') + F.softmax(x_fp16) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_std_layer.py b/python/paddle/fluid/tests/unittests/test_std_layer.py index d1e0056304204bf0dbe47982bbf4b9574acf8eac..e455151481443c1fb918efd9e44444536adc6b7f 100644 --- a/python/paddle/fluid/tests/unittests/test_std_layer.py +++ b/python/paddle/fluid/tests/unittests/test_std_layer.py @@ -15,65 +15,104 @@ import unittest import numpy as np import paddle -import paddle.fluid as fluid -class TestStdLayer(unittest.TestCase): +def ref_std(x, axis=None, unbiased=True, keepdim=False): + ddof = 1 if unbiased else 0 + if isinstance(axis, int): + axis = (axis, ) + if axis is not None: + axis = tuple(axis) + return np.std(x, axis=axis, ddof=ddof, keepdims=keepdim) + + +class TestStdAPI(unittest.TestCase): def setUp(self): - self._dtype = "float64" - self._input = np.random.random([2, 3, 4, 5]).astype(self._dtype) - - def static(self, axis=None, keepdim=False, unbiased=True): - prog = fluid.Program() - with fluid.program_guard(prog): - data = fluid.data( - name="data", dtype=self._dtype, shape=[None, 3, 4, 5]) - out = prog.current_block().create_var( - dtype=self._dtype, shape=[2, 3, 4, 5]) - paddle.std(input=data, - axis=axis, - keepdim=keepdim, - unbiased=unbiased, - out=out) - - exe = fluid.Executor(self._place) - return exe.run(feed={"data": self._input}, - program=prog, - fetch_list=[out])[0] - - def dynamic(self, axis=None, keepdim=False, unbiased=True): - with fluid.dygraph.guard(self._place): - data = fluid.dygraph.to_variable(self._input) - out = paddle.std(input=data, - axis=axis, - keepdim=keepdim, - unbiased=unbiased) - return out.numpy() - - def numpy(self, axis=None, keepdim=False, unbiased=True): - ddof = 1 if unbiased else 0 - axis = tuple(axis) if isinstance(axis, list) else axis - return np.std(self._input, axis=axis, keepdims=keepdim, ddof=ddof) - - def test_equal(self): - places = [] - if fluid.core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - self._place = place - self.assertTrue(np.allclose(self.numpy(), self.static())) - self.assertTrue( - np.allclose( - self.numpy(axis=[0, 2]), self.dynamic(axis=[0, 2]))) - self.assertTrue( - np.allclose( - self.numpy( - axis=[1, 3], keepdim=True), - self.dynamic( - axis=[1, 3], keepdim=True))) - self.assertTrue( - np.allclose( - self.numpy(unbiased=False), self.dynamic(unbiased=False))) + self.dtype = 'float64' + self.shape = [1, 3, 4, 10] + self.axis = [1, 3] + self.keepdim = False + self.unbiased = True + self.set_attrs() + self.x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + self.place=paddle.CUDAPlace(0) \ + if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def set_attrs(self): + pass + + def static(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.shape, self.dtype) + out = paddle.std(x, self.axis, self.unbiased, self.keepdim) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x}, fetch_list=[out]) + return res[0] + + def dygraph(self): + paddle.disable_static() + x = paddle.to_tensor(self.x) + out = paddle.std(x, self.axis, self.unbiased, self.keepdim) + paddle.enable_static() + return out.numpy() + + def test_api(self): + out_ref = ref_std(self.x, self.axis, self.unbiased, self.keepdim) + out_dygraph = self.dygraph() + out_static = self.static() + for out in [out_dygraph, out_static]: + self.assertTrue(np.allclose(out_ref, out)) + self.assertTrue(np.equal(out_ref.shape, out.shape).all()) + + +class TestStdAPI_dtype(TestStdAPI): + def set_attrs(self): + self.dtype = 'float32' + + +class TestStdAPI_axis_int(TestStdAPI): + def set_attrs(self): + self.axis = 2 + + +class TestStdAPI_axis_list(TestStdAPI): + def set_attrs(self): + self.axis = [1, 2] + + +class TestStdAPI_axis_tuple(TestStdAPI): + def set_attrs(self): + self.axis = (1, 3) + + +class TestStdAPI_keepdim(TestStdAPI): + def set_attrs(self): + self.keepdim = False + + +class TestStdAPI_unbiased(TestStdAPI): + def set_attrs(self): + self.unbiased = False + + +class TestStdAPI_alias(unittest.TestCase): + def test_alias(self): + paddle.disable_static() + x = paddle.to_tensor(np.array([10, 12], 'float32')) + out1 = paddle.std(x).numpy() + out2 = paddle.tensor.std(x).numpy() + out3 = paddle.tensor.stat.std(x).numpy() + self.assertTrue(np.allclose(out1, out2)) + self.assertTrue(np.allclose(out1, out3)) + paddle.enable_static() + + +class TestStdError(unittest.TestCase): + def test_error(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [2, 3, 4], 'int32') + self.assertRaises(TypeError, paddle.std, x) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py index 8fd118c0193035fce294aa6ac23951d57ba43f78..b0701a9b187f6c7cf63f43d69f482ea13e6d3fe3 100644 --- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py @@ -22,9 +22,11 @@ import unittest import numpy as np import os import six +import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid import compiler +from paddle.fluid import Program, program_guard from op_test import OpTest, _set_use_system_allocator @@ -202,5 +204,22 @@ class TestFP16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining): self.atol = 1e-2 +class TestDygraphSyncBatchNormAPIError(unittest.TestCase): + def test_errors(self): + if not core.is_compiled_with_cuda(): + return + + with program_guard(Program(), Program()): + my_sync_batch_norm = paddle.nn.SyncBatchNorm(10) + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CUDAPlace(0)) + self.assertRaises(TypeError, my_sync_batch_norm, x1) + + # the input dtype of SyncBatchNorm must be float16 or float32 or float64 + # float16 only can be set on GPU place + x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32") + self.assertRaises(TypeError, my_sync_batch_norm, x2) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py new file mode 100644 index 0000000000000000000000000000000000000000..5aaf31993448ab0ff0c69f648cfa84c62d3e198b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -0,0 +1,251 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard + + +# Situation 1: repeat_times is a list (without tensor) +class TestTileOpRank1(OpTest): + def setUp(self): + self.op_type = "tile" + self.init_data() + + self.inputs = {'X': np.random.random(self.ori_shape).astype("float64")} + self.attrs = {'repeat_times': self.repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +# with dimension expanding +class TestTileOpRank2Expanding(TestTileOpRank1): + def init_data(self): + self.ori_shape = [120] + self.repeat_times = [2, 2] + + +class TestTileOpRank2(TestTileOpRank1): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +class TestTileOpRank3_Corner(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (1, 1, 1) + + +class TestTileOpRank3_Corner2(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.repeat_times = (2, 2) + + +class TestTileOpRank3(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 15) + self.repeat_times = (2, 1, 4) + + +class TestTileOpRank4(TestTileOpRank1): + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.repeat_times = (3, 2, 1, 2) + + +# Situation 2: repeat_times is a list (with tensor) +class TestTileOpRank1_tensor_attr(OpTest): + def setUp(self): + self.op_type = "tile" + self.init_data() + repeat_times_tensor = [] + for index, ele in enumerate(self.repeat_times): + repeat_times_tensor.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float64"), + 'repeat_times_tensor': repeat_times_tensor, + } + self.attrs = {"repeat_times": self.infer_repeat_times} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + self.infer_repeat_times = [-1] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestTileOpRank2_Corner_tensor_attr(TestTileOpRank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [1, 1] + self.infer_repeat_times = [1, -1] + + +class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + self.infer_repeat_times = [-1, 3] + + +# Situation 3: repeat_times is a tensor +class TestTileOpRank1_tensor(OpTest): + def setUp(self): + self.op_type = "tile" + self.init_data() + + self.inputs = { + 'X': np.random.random(self.ori_shape).astype("float64"), + 'RepeatTimes': np.array(self.repeat_times).astype("int32"), + } + self.attrs = {} + output = np.tile(self.inputs['X'], self.repeat_times) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [100] + self.repeat_times = [2] + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestTileOpRank2_tensor(TestTileOpRank1_tensor): + def init_data(self): + self.ori_shape = [12, 14] + self.repeat_times = [2, 3] + + +# Situation 4: input x is Integer +class TestTileOpInteger(OpTest): + def setUp(self): + self.op_type = "tile" + self.inputs = { + 'X': np.random.randint( + 10, size=(4, 4, 5)).astype("int32") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +# Situation 5: input x is Bool +class TestTileOpBoolean(OpTest): + def setUp(self): + self.op_type = "tile" + self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +# Situation 56: input x is Integer +class TestTileOpInt64_t(OpTest): + def setUp(self): + self.op_type = "tile" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("int64") + } + self.attrs = {'repeat_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestTileError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + x1 = fluid.create_lod_tensor( + np.array([[-1]]), [[1]], fluid.CPUPlace()) + repeat_times = [2, 2] + self.assertRaises(TypeError, paddle.tile, x1, repeat_times) + x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8") + self.assertRaises(TypeError, paddle.tile, x2, repeat_times) + x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool") + x3.stop_gradient = False + self.assertRaises(ValueError, paddle.tile, x3, repeat_times) + + +class TestTileAPIStatic(unittest.TestCase): + def test_api(self): + with program_guard(Program(), Program()): + repeat_times = [2, 2] + x1 = fluid.layers.data(name='x1', shape=[4], dtype="int32") + out = paddle.tile(x1, repeat_times) + positive_2 = fluid.layers.fill_constant([1], dtype="int32", value=2) + out2 = paddle.tile(x1, repeat_times=[positive_2, 2]) + + +# Test python API +class TestTileAPI(unittest.TestCase): + def test_api(self): + with fluid.dygraph.guard(): + np_x = np.random.random([12, 14]).astype("float32") + x = paddle.to_variable(np_x) + + positive_2 = np.array([2]).astype("int32") + positive_2 = paddle.to_variable(positive_2) + + repeat_times = np.array([2, 3]).astype("int32") + repeat_times = paddle.to_variable(repeat_times) + + out_1 = paddle.tile(x, repeat_times=[2, 3]) + out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) + out_3 = paddle.tile(x, repeat_times=repeat_times) + + assert np.array_equal(out_1.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_2.numpy(), np.tile(np_x, (2, 3))) + assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3))) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py new file mode 100644 index 0000000000000000000000000000000000000000..c8d1e77134036bf7b28d4afb8bacaa44092b1053 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -0,0 +1,477 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.nn.layer.transformer import MultiHeadAttention, TransformerEncoderLayer, TransformerDecoderLayer, TransformerEncoder, TransformerDecoder, Transformer + +import unittest + + +def generate_basic_params(mode="attn", self_attention=True): + batch_size, query_length = [np.random.randint(2, 10) for _ in range(2)] + d_head, num_heads = [np.random.randint(3, 10) for _ in range(2)] + attn_dropout = 0.0 + embed_dim = d_head * num_heads + if mode == "attn": + if self_attention: + kdim, vdim = embed_dim, embed_dim + key_length, value_length = query_length, query_length + else: + kdim, vdim = [np.random.randint(5, 20) for _ in range(2)] + key_length = np.random.randint(2, 10) + value_length = key_length + return batch_size, query_length, key_length, value_length, embed_dim, kdim, vdim, num_heads, attn_dropout + + else: + dropout, act_dropout = 0.0, 0.0 + dim_feedforward = np.random.randint(128, 1024) + sequence_length = np.random.randint(2, 10) + if mode == "encoder_layer": + return batch_size, embed_dim, num_heads, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length + elif mode == "decoder_layer": + target_length = np.random.randint(2, 10) + return batch_size, embed_dim, num_heads, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length, target_length + + +def generate_query_key_value_cache(self_attention, + batch_size, + num_heads, + query_length, + embed_dim, + key_length=None, + value_length=None, + kdim=None, + vdim=None, + cache=None): + query = np.random.rand(batch_size, query_length, + embed_dim).astype("float32") + attn_mask = np.zeros((batch_size, num_heads, query_length, key_length)) + attn_mask[0][0][0][0] = -1e9 + + head_dim = embed_dim // num_heads + if self_attention: + key, value = query, query + else: + key = np.random.rand(batch_size, key_length, kdim).astype("float32") + value = np.random.rand(batch_size, value_length, vdim).astype("float32") + cache_dict = {} + if cache: + if not self_attention: + cache_dict["static_k"] = np.random.rand( + batch_size, num_heads, key_length, head_dim).astype("float32") + cache_dict["static_v"] = np.random.rand( + batch_size, num_heads, value_length, head_dim).astype("float32") + else: + cache_dict["k"] = np.random.rand(batch_size, num_heads, key_length, + head_dim).astype("float32") + cache_dict["v"] = np.random.rand( + batch_size, num_heads, value_length, head_dim).astype("float32") + else: + cache_dict = None + return query, key, value, attn_mask, cache_dict + + +def fc(x, weight): + return np.matmul(x, weight) + + +def softmax(x): + np.seterr(invalid='ignore') + output = np.zeros(x.shape, dtype=np.float64) + for i in range(x.shape[0]): + for j in range(x.shape[1]): + for k in range(x.shape[2]): + x_curr = x[i, j, k, :] + e_x = np.exp(x_curr - np.amax(x_curr)) + output[i, j, k, :] = e_x / np.sum(e_x) + return output + + +def batch_matmul(x, y): + assert x.shape[0] == y.shape[0] + assert x.shape[1] == y.shape[1] + retval = np.zeros( + (x.shape[0], x.shape[1], x.shape[2], y.shape[3]), dtype=np.float64) + for i in range(x.shape[0]): + for j in range(x.shape[1]): + retval[i, j, :, :] = np.matmul(x[i, j, :, :], y[i, j, :, :]) + return retval + + +def scaled_dot_product_attention(q, k, v, d_key, attn_mask, multi_head_attn): + k = k.transpose([0, 1, 3, 2]) + qkt = batch_matmul(q, k / np.sqrt(d_key, dtype=np.float64)) + if attn_mask is not None: + qkt += attn_mask + weight = softmax(qkt) + attn_heads = batch_matmul(weight, v) + attn_heads = attn_heads.transpose((0, 2, 1, 3)) + attn_heads = attn_heads.reshape((attn_heads.shape[0], attn_heads.shape[1], + attn_heads.shape[2] * attn_heads.shape[3])) + return attn_heads + + +def cal_qkv(key, value, num_heads, embed_dim, multi_head_attn): + with fluid.dygraph.guard(): + head_dim = embed_dim // num_heads + k_weight = multi_head_attn.k_proj.weight.numpy() + v_weight = multi_head_attn.v_proj.weight.numpy() + k = fc(key, k_weight) + v = fc(value, v_weight) + k = k.reshape((k.shape[0], k.shape[1], num_heads, head_dim)) + k = k.transpose((0, 2, 1, 3)) + v = v.reshape((v.shape[0], v.shape[1], num_heads, head_dim)) + v = v.transpose((0, 2, 1, 3)) + return k, v + + +def prepare_qkv(query, key, value, num_heads, embed_dim, self_attention, + multi_head_attn, cache_dict): + q_weight = multi_head_attn.q_proj.weight.numpy() + q = fc(query, q_weight) + q = q.reshape((q.shape[0], q.shape[1], num_heads, embed_dim // num_heads)) + q = q.transpose((0, 2, 1, 3)) + + if not self_attention and cache_dict: + k, v = cache_dict["static_k"], cache_dict["static_v"] + else: + k, v = cal_qkv(key, value, num_heads, embed_dim, multi_head_attn) + if cache_dict is not None: + k = np.concatenate((cache_dict["k"], k), axis=2) + v = np.concatenate((cache_dict["v"], v), axis=2) + return (q, k, v, cache_dict) + + +def add(x, y=None): + fluid.enable_dygraph() + with fluid.dygraph.guard(): + x = x.numpy() if not isinstance(x, np.ndarray) else x + if y is not None: + x += y + return x + return x + + +def relu(x): + compare = x > 0 + return x * compare + + +def layer_norm(x, normalized_shape, norm, epsilon=1e-05, act=None): + fluid.enable_dygraph() + with fluid.dygraph.guard(): + # scale: + weight = norm.weight.numpy() + # shift: + bias = norm.bias.numpy() + + batch_size, src_len, d_model = x.shape + x = x.reshape((batch_size * src_len, d_model)) + mu = np.mean(x, axis=1, keepdims=True) + sigma_squar = np.sum(np.square(x - mu), axis=1) / d_model + x1_up = (x - mu) + x1_down_1 = sigma_squar + epsilon + x1_down = np.sqrt(x1_down_1) + x1_down = x1_down.reshape((x1_down.shape[0], 1)) + x1 = x1_up / x1_down + x_scaled = weight * x1 + x_scaled_bias = x_scaled + bias + x_scaled_bias = x_scaled_bias.reshape((batch_size, src_len, d_model)) + return x_scaled_bias + + +def ffn(src, encoder_layer, ffn_fc1_act="relu"): + assert ffn_fc1_act == "relu", "only relu is supported" + fluid.enable_dygraph() + with fluid.dygraph.guard(): + src = src.numpy() if not isinstance(src, np.ndarray) else src + w1 = encoder_layer.linear1.weight.numpy() + w2 = encoder_layer.linear2.weight.numpy() + # fc1 + x1 = fc(src, w1) + x1 = relu(x1) + # fc2 + x2 = fc(x1, w2) + return x2 + + +class TestTransformer(unittest.TestCase): + def test_multi_head_attention(self): + def multihead_attention_test_helper(self_attention, cache): + paddle.framework.manual_seed(2020) + # self_attention|cross_attention, cache|No cache + with fluid.dygraph.guard(fluid.CPUPlace()): + + # generate params for multi_head_attention + batch_size, query_length, key_length, value_length, embed_dim, kdim, vdim, num_heads, attn_dropout = generate_basic_params( + "attn", self_attention) + query, key, value, attn_mask, cache_dict = generate_query_key_value_cache( + self_attention, batch_size, num_heads, query_length, + embed_dim, key_length, value_length, kdim, vdim, cache) + if cache and self_attention: + attn_mask = np.concatenate((attn_mask, attn_mask), axis=3) + need_weight, param_attr, bias_attr = False, None, None + # call paddle's function + multi_head_attn = MultiHeadAttention( + embed_dim, num_heads, attn_dropout, kdim, vdim, need_weight, + param_attr, bias_attr) + # construct cache object + cache_obj = None + if cache_dict: + if 'k' and 'v' in cache_dict: + cache_obj = multi_head_attn.Cache( + paddle.to_variable(cache_dict['k']), + paddle.to_variable(cache_dict['v'])) + elif 'static_k' and 'static_v' in cache_dict: + cache_obj = multi_head_attn.StaticCache( + paddle.to_variable(cache_dict['static_k']), + paddle.to_variable(cache_dict['static_v'])) + if attn_mask is not None: + attn_output = multi_head_attn( + paddle.to_variable(query), + paddle.to_variable(key), + paddle.to_variable(value), + paddle.to_variable(attn_mask), cache_obj) + else: + attn_output = multi_head_attn( + paddle.to_variable(query), + paddle.to_variable(key), + paddle.to_variable(value), attn_mask, cache_obj) + attn_output = attn_output[0] if cache_dict else attn_output + + # implementation by numpy + # compute q, k, v + q, k, v, _ = prepare_qkv(query, key, value, num_heads, + embed_dim, self_attention, + multi_head_attn, cache_dict) + # scale dot product attention + attn_heads = scaled_dot_product_attention( + q, k, v, embed_dim // num_heads, attn_mask, multi_head_attn) + out_proj_weight = multi_head_attn.out_proj.weight.numpy() + reference = fc(attn_heads, out_proj_weight) + + np.testing.assert_allclose( + attn_output.numpy(), reference, atol=1e-6) + + multihead_attention_test_helper(True, True) + multihead_attention_test_helper(True, False) + multihead_attention_test_helper(False, True) + multihead_attention_test_helper(False, False) + + def test_transformer_encoder_layer(self): + + with fluid.dygraph.guard(fluid.CPUPlace()): + paddle.framework.manual_seed(2020) + + ffn_fc1_act = "relu" + # 1.generate basic params + batch_size, d_model, n_head, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length = generate_basic_params( + mode="encoder_layer") + # 2.generate input for encoder + src = np.random.rand(batch_size, sequence_length, + d_model).astype("float32") + residual = src + src_mask = np.zeros((batch_size, n_head, sequence_length, + sequence_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + + # paddle + encoder_layer = TransformerEncoderLayer( + d_model, n_head, dim_feedforward, dropout, ffn_fc1_act, + attn_dropout, act_dropout) + + encoder_output = encoder_layer( + paddle.to_variable(src), + paddle.to_variable(src_mask)) # paddle.to_variable(src_mask)) + # 4.numpy: + # paddle self attention + self_attn = MultiHeadAttention( + d_model, n_head, dropout=attn_dropout) + attn_output = self_attn( + paddle.to_variable(src), + paddle.to_variable(src), + paddle.to_variable(src), paddle.to_variable(src_mask)).numpy() + + src = attn_output + residual + src_norm = layer_norm(src, d_model, encoder_layer.norm1) + residual = src_norm + + ffn_output = ffn(src_norm, encoder_layer, ffn_fc1_act) + src = residual + ffn_output + src = layer_norm(src, d_model, encoder_layer.norm2) + + np.testing.assert_allclose( + encoder_output.numpy(), src, rtol=1e-5, atol=1e-6) + + def test_transformer_decoder_layer(self): + with fluid.dygraph.guard(fluid.CPUPlace()): + paddle.framework.manual_seed(2020) + activation = "relu" + normalize_before = False + batch_size, d_model, n_head, dim_feedforward, dropout, attn_dropout, act_dropout, source_length, target_length = generate_basic_params( + mode="decoder_layer") + tgt = np.random.rand(batch_size, target_length, + d_model).astype("float32") + memory = np.random.rand(batch_size, source_length, + d_model).astype("float32") + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + for cache in [True, False]: + self_attn = MultiHeadAttention( + d_model, n_head, dropout=attn_dropout) + cross_attn = MultiHeadAttention( + d_model, n_head, dropout=attn_dropout) + + # paddle decoderlayer: + decoder_layer = TransformerDecoderLayer( + d_model, n_head, dim_feedforward, dropout, activation, + attn_dropout, act_dropout, normalize_before) + cache_objs = None + if cache: + cache_objs = decoder_layer.gen_cache( + paddle.to_variable(memory)) + + decoder_output = decoder_layer( + paddle.to_variable(tgt), + paddle.to_variable(memory), + paddle.to_variable(tgt_mask), + paddle.to_variable(memory_mask), cache_objs) + + decoder_output = decoder_output[0].numpy( + ) if cache else decoder_output.numpy() + + # numpy: + residual = tgt + # self-attn + self_attn_cache = cache_objs[ + 0] if cache_objs is not None else None + tgt = self_attn( + paddle.to_variable(tgt), + paddle.to_variable(tgt), + paddle.to_variable(tgt), + paddle.to_variable(tgt_mask), self_attn_cache) + + tgt = tgt[0].numpy() if cache else tgt.numpy() + + tgt = residual + tgt + # postprocess + tgt_norm = layer_norm(tgt, d_model, decoder_layer.norm1) + residual = tgt_norm + # cross-attn + cross_attn_cache = cache_objs[ + 1] if cache_objs is not None else None + tgt = cross_attn( + paddle.to_variable(tgt_norm), + paddle.to_variable(memory), + paddle.to_variable(memory), + paddle.to_variable(memory_mask), cross_attn_cache) + tgt = tgt[0].numpy() if cache else tgt.numpy() + + # postprocess + tgt = tgt + residual + tgt_norm = layer_norm(tgt, d_model, decoder_layer.norm2) + residual = tgt_norm + # FFN + ffn_output = ffn(tgt_norm, decoder_layer, activation) + # post process + tgt = residual + ffn_output + tgt_norm = layer_norm(tgt, d_model, decoder_layer.norm3) + + np.testing.assert_allclose( + decoder_output, tgt_norm, rtol=1e-5, atol=1e-6) + + def test_encoder(self): + batch_size, d_model, n_head, dim_feedforward, dropout, attn_dropout, act_dropout, sequence_length = generate_basic_params( + mode="encoder_layer") + + src = np.random.rand(batch_size, sequence_length, + d_model).astype("float32") + + src_mask = np.zeros((batch_size, n_head, sequence_length, + sequence_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + with fluid.dygraph.guard(fluid.CPUPlace()): + encoder_layer = TransformerEncoderLayer(d_model, n_head, + dim_feedforward, dropout) + num_layers = 6 + encoder = TransformerEncoder(encoder_layer, num_layers) + # src, src_mask + enc_output = encoder( + paddle.to_variable(src), paddle.to_variable(src_mask)) + + def test_decoder(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + tgt = np.random.rand(batch_size, target_length, + d_model).astype("float32") + memory = np.random.rand(batch_size, source_length, + d_model).astype("float32") + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + with fluid.dygraph.guard(fluid.CPUPlace()): + decoder_layer = TransformerDecoderLayer(d_model, n_head, + dim_feedforward, dropout) + num_layers = 6 + decoder = TransformerDecoder(decoder_layer, num_layers) + + output = decoder( + paddle.to_variable(tgt), + paddle.to_variable(memory), + paddle.to_variable(tgt_mask), paddle.to_variable(memory_mask)) + + def test_transformer(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + + # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 + with fluid.dygraph.guard(fluid.CPUPlace()): + transformer = Transformer( + d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout) + src = paddle.to_variable( + np.random.rand(batch_size, source_length, d_model).astype( + "float32")) + tgt = paddle.to_variable( + np.random.rand(batch_size, target_length, d_model).astype( + "float32")) + src_mask = np.zeros((batch_size, n_head, source_length, + source_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + src_mask = paddle.to_variable(src_mask) + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + tgt_mask, memory_mask = paddle.to_variable( + tgt_mask), paddle.to_variable(memory_mask) + trans_output = transformer(src, tgt, src_mask, tgt_mask, + memory_mask) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py index 9a64dd1deea93f473d73d485ec5a9d707aaa54f9..158462a1e6e1012b7473a2410f2c003d04ea2e40 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py @@ -14,9 +14,12 @@ from __future__ import print_function +import sys +import subprocess import unittest import numpy as np from op_test import OpTest +import paddle import paddle.fluid.core as core from paddle.fluid.op import Operator import paddle.fluid as fluid @@ -472,5 +475,61 @@ class TestUniformRandomBatchSizeLikeOpError(unittest.TestCase): self.assertRaises(TypeError, test_dtype) +class TestUniformAlias(unittest.TestCase): + def test_alias(self): + paddle.uniform([2, 3], min=-5.0, max=5.0) + paddle.tensor.uniform([2, 3], min=-5.0, max=5.0) + paddle.tensor.random.uniform([2, 3], min=-5.0, max=5.0) + + def test_uniform_random(): + paddle.tensor.random.uniform_random([2, 3], min=-5.0, max=5.0) + + self.assertRaises(AttributeError, test_uniform_random) + + +class TestUniformOpError(unittest.TestCase): + def test_errors(self): + main_prog = Program() + start_prog = Program() + with program_guard(main_prog, start_prog): + + def test_Variable(): + x1 = fluid.create_lod_tensor( + np.zeros((4, 784)), [[1, 1, 1, 1]], fluid.CPUPlace()) + paddle.tensor.random.uniform(x1) + + self.assertRaises(TypeError, test_Variable) + + def test_Variable2(): + x1 = np.zeros((4, 784)) + paddle.tensor.random.uniform(x1) + + self.assertRaises(TypeError, test_Variable2) + + def test_dtype(): + x2 = fluid.layers.data( + name='x2', shape=[4, 784], dtype='float32') + paddle.tensor.random.uniform(x2, 'int32') + + self.assertRaises(TypeError, test_dtype) + + def test_out_dtype(): + out = paddle.tensor.random.uniform( + shape=[3, 4], dtype='float64') + self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) + + test_out_dtype() + + +class TestUniformDygraphMode(unittest.TestCase): + def test_check_output(self): + with fluid.dygraph.guard(): + x = paddle.tensor.random.uniform( + [10], dtype="float32", min=0.0, max=1.0) + x_np = x.numpy() + for i in range(10): + self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 7e565ca31b219366b7ab83267b46f32e5812d983..80b94704c388824901312b5d577cb5cfd0d0c75b 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_, in_dygraph_mode +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.fluid.core as core @@ -28,6 +29,74 @@ class TestVarBase(unittest.TestCase): self.dtype = np.float32 self.array = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) + def test_to_tensor(self): + def _test_place(place): + with fluid.dygraph.guard(): + x = paddle.to_tensor( + 1, dtype='float32', place=place, stop_gradient=False) + self.assertTrue(np.array_equal(x.numpy(), [1.])) + self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) + self.assertEqual(x.shape, [1]) + self.assertEqual(x.stop_gradient, False) + self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) + + x = paddle.to_tensor( + (1, 2), dtype='float32', place=place, stop_gradient=False) + x = paddle.to_tensor( + [1, 2], dtype='float32', place=place, stop_gradient=False) + self.assertTrue(np.array_equal(x.numpy(), [1., 2.])) + self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) + self.assertEqual(x.grad, None) + self.assertEqual(x.shape, [2]) + self.assertEqual(x.stop_gradient, False) + self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) + + x = paddle.to_tensor( + self.array, + dtype='float32', + place=place, + stop_gradient=False) + self.assertTrue(np.array_equal(x.numpy(), self.array)) + self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) + self.assertEqual(x.shape, self.shape) + self.assertEqual(x.stop_gradient, False) + self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR) + + y = paddle.to_tensor(x) + y = paddle.to_tensor(y, dtype='float64', place=place) + self.assertTrue(np.array_equal(y.numpy(), self.array)) + self.assertEqual(y.dtype, core.VarDesc.VarType.FP64) + self.assertEqual(y.shape, self.shape) + self.assertEqual(y.stop_gradient, True) + self.assertEqual(y.type, core.VarDesc.VarType.LOD_TENSOR) + z = x + y + self.assertTrue(np.array_equal(z.numpy(), 2 * self.array)) + + x = paddle.to_tensor( + [1 + 2j, 1 - 2j], dtype='complex64', place=place) + y = paddle.to_tensor(x) + self.assertTrue(np.array_equal(x.numpy(), [1 + 2j, 1 - 2j])) + self.assertEqual(y.dtype, 'complex64') + self.assertEqual(y.shape, [2]) + self.assertEqual(y.real.stop_gradient, True) + self.assertEqual(y.real.type, core.VarDesc.VarType.LOD_TENSOR) + + with self.assertRaises(TypeError): + paddle.to_tensor('test') + with self.assertRaises(TypeError): + paddle.to_tensor(1, dtype='test') + with self.assertRaises(ValueError): + paddle.to_tensor([[1], [2, 3]]) + with self.assertRaises(ValueError): + paddle.to_tensor([[1], [2, 3]], place='test') + with self.assertRaises(ValueError): + paddle.to_tensor([[1], [2, 3]], place=1) + + _test_place(core.CPUPlace()) + if core.is_compiled_with_cuda(): + _test_place(core.CUDAPinnedPlace()) + _test_place(core.CUDAPlace(0)) + def test_to_variable(self): with fluid.dygraph.guard(): var = fluid.dygraph.to_variable(self.array, name="abc") @@ -76,7 +145,7 @@ class TestVarBase(unittest.TestCase): with fluid.dygraph.guard(): var = fluid.dygraph.to_variable(self.array) - self.assertEqual(var.name, 'generated_var_0') + self.assertEqual(var.name, 'generated_tensor_0') var.name = 'test' self.assertEqual(var.name, 'test') diff --git a/python/paddle/fluid/tests/unittests/test_variance_layer.py b/python/paddle/fluid/tests/unittests/test_variance_layer.py index 569f064db8549b5f28bc751a36cbe4b379636379..b5bb3cc978a558bb52f5f56c58f107b653956a75 100644 --- a/python/paddle/fluid/tests/unittests/test_variance_layer.py +++ b/python/paddle/fluid/tests/unittests/test_variance_layer.py @@ -15,65 +15,104 @@ import unittest import numpy as np import paddle -import paddle.fluid as fluid -class TestVarianceLayer(unittest.TestCase): +def ref_var(x, axis=None, unbiased=True, keepdim=False): + ddof = 1 if unbiased else 0 + if isinstance(axis, int): + axis = (axis, ) + if axis is not None: + axis = tuple(axis) + return np.var(x, axis=axis, ddof=ddof, keepdims=keepdim) + + +class TestVarAPI(unittest.TestCase): def setUp(self): - self._dtype = "float64" - self._input = np.random.random([2, 3, 4, 5]).astype(self._dtype) - - def static(self, axis=None, keepdim=False, unbiased=True): - prog = fluid.Program() - with fluid.program_guard(prog): - data = fluid.data( - name="data", dtype=self._dtype, shape=[None, 3, 4, 5]) - out = prog.current_block().create_var( - dtype=self._dtype, shape=[2, 3, 4, 5]) - paddle.var(input=data, - axis=axis, - keepdim=keepdim, - unbiased=unbiased, - out=out) - - exe = fluid.Executor(self._place) - return exe.run(feed={"data": self._input}, - program=prog, - fetch_list=[out])[0] - - def dynamic(self, axis=None, keepdim=False, unbiased=True): - with fluid.dygraph.guard(self._place): - data = fluid.dygraph.to_variable(self._input) - out = paddle.var(input=data, - axis=axis, - keepdim=keepdim, - unbiased=unbiased) - return out.numpy() - - def numpy(self, axis=None, keepdim=False, unbiased=True): - ddof = 1 if unbiased else 0 - axis = tuple(axis) if isinstance(axis, list) else axis - return np.var(self._input, axis=axis, keepdims=keepdim, ddof=ddof) - - def test_equal(self): - places = [fluid.CPUPlace()] - if fluid.core.is_compiled_with_cuda(): - places.append(fluid.CUDAPlace(0)) - for place in places: - self._place = place - self.assertTrue(np.allclose(self.numpy(), self.static())) - self.assertTrue( - np.allclose( - self.numpy(axis=[0, 2]), self.dynamic(axis=[0, 2]))) - self.assertTrue( - np.allclose( - self.numpy( - axis=[1, 3], keepdim=True), - self.dynamic( - axis=[1, 3], keepdim=True))) - self.assertTrue( - np.allclose( - self.numpy(unbiased=False), self.dynamic(unbiased=False))) + self.dtype = 'float64' + self.shape = [1, 3, 4, 10] + self.axis = [1, 3] + self.keepdim = False + self.unbiased = True + self.set_attrs() + self.x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + self.place=paddle.CUDAPlace(0) \ + if paddle.fluid.core.is_compiled_with_cuda() \ + else paddle.CPUPlace() + + def set_attrs(self): + pass + + def static(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', self.shape, self.dtype) + out = paddle.var(x, self.axis, self.unbiased, self.keepdim) + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'X': self.x}, fetch_list=[out]) + return res[0] + + def dygraph(self): + paddle.disable_static() + x = paddle.to_tensor(self.x) + out = paddle.var(x, self.axis, self.unbiased, self.keepdim) + paddle.enable_static() + return out.numpy() + + def test_api(self): + out_ref = ref_var(self.x, self.axis, self.unbiased, self.keepdim) + out_dygraph = self.dygraph() + out_static = self.static() + for out in [out_dygraph, out_static]: + self.assertTrue(np.allclose(out_ref, out)) + self.assertTrue(np.equal(out_ref.shape, out.shape).all()) + + +class TestVarAPI_dtype(TestVarAPI): + def set_attrs(self): + self.dtype = 'float32' + + +class TestVarAPI_axis_int(TestVarAPI): + def set_attrs(self): + self.axis = 2 + + +class TestVarAPI_axis_list(TestVarAPI): + def set_attrs(self): + self.axis = [1, 2] + + +class TestVarAPI_axis_tuple(TestVarAPI): + def set_attrs(self): + self.axis = (1, 3) + + +class TestVarAPI_keepdim(TestVarAPI): + def set_attrs(self): + self.keepdim = False + + +class TestVarAPI_unbiased(TestVarAPI): + def set_attrs(self): + self.unbiased = False + + +class TestVarAPI_alias(unittest.TestCase): + def test_alias(self): + paddle.disable_static() + x = paddle.to_tensor(np.array([10, 12], 'float32')) + out1 = paddle.var(x).numpy() + out2 = paddle.tensor.var(x).numpy() + out3 = paddle.tensor.stat.var(x).numpy() + self.assertTrue(np.allclose(out1, out2)) + self.assertTrue(np.allclose(out1, out3)) + paddle.enable_static() + + +class TestVarError(unittest.TestCase): + def test_error(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.data('X', [2, 3, 4], 'int32') + self.assertRaises(TypeError, paddle.var, x) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 449ac959188949559056654418ace3e227c368da..6bc42f0712a1a8c9f9a0640e06042c42e7cc948f 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -21,25 +21,25 @@ from op_test import OpTest from test_softmax_op import stable_softmax import paddle.fluid as fluid from paddle.fluid import Program, program_guard +import paddle +import paddle.nn.functional as F CUDA_BLOCK_SIZE = 512 class CTCForward(object): - def __init__(self, softmax, softmax_lod, labels, labels_lod, blank, - norm_by_times): + def __init__(self, softmax, softmax_lod, labels, labels_lod, num_classes, + batch_size, blank, norm_by_times): self.softmax = softmax self.softmax_lod = softmax_lod - assert labels.shape[1] == 1 self.labels = labels self.labels_lod = labels_lod self.blank = blank self.norm_by_times = norm_by_times self.level = 0 - self.num_classes = softmax.shape[1] - self.batch_size = len(softmax_lod[self.level]) - assert self.batch_size == len(labels_lod[self.level]) + self.num_classes = num_classes + self.batch_size = batch_size self.loss = np.zeros([self.batch_size, 1], dtype="float32") self.gradient = np.zeros(self.softmax.shape, dtype="float32") @@ -163,17 +163,25 @@ class CTCForward(object): softmax_offset = 0 labels_offset = 0 for i in range(self.batch_size): - softmax_start_i = softmax_offset - softmax_end_i = softmax_offset + self.softmax_lod[self.level][i] - labels_start_i = labels_offset - labels_end_i = labels_offset + self.labels_lod[self.level][i] - - softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :] - labels_a_sequence = self.labels[labels_start_i:labels_end_i, :] - self.loss[i] = self.forward_a_sequence(softmax_a_sequence, - labels_a_sequence) - softmax_offset += self.softmax_lod[self.level][i] - labels_offset += self.labels_lod[self.level][i] + if self.labels.shape[1] == 1: + softmax_start_i = softmax_offset + softmax_end_i = softmax_offset + self.softmax_lod[self.level][i] + labels_start_i = labels_offset + labels_end_i = labels_offset + self.labels_lod[self.level][i] + + softmax_a_sequence = self.softmax[softmax_start_i: + softmax_end_i, :] + labels_a_sequence = self.labels[labels_start_i:labels_end_i, :] + self.loss[i] = self.forward_a_sequence(softmax_a_sequence, + labels_a_sequence) + softmax_offset += self.softmax_lod[self.level][i] + labels_offset += self.labels_lod[self.level][i] + else: + softmax_a_sequence = self.softmax[:self.softmax_lod[i], i, :] + labels_a_sequence = self.labels[:self.labels_lod[i], :] + self.loss[i] = self.forward_a_sequence(softmax_a_sequence, + labels_a_sequence) + return self.loss @@ -201,7 +209,8 @@ class TestWarpCTCOp(OpTest): dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, - self.blank, self.norm_by_times) + self.num_classes, self.batch_size, self.blank, + self.norm_by_times) loss = ctc.forward() max_sequence_length = 0 @@ -223,7 +232,7 @@ class TestWarpCTCOp(OpTest): } def test_check_output(self): - self.check_output(check_dygraph=False) + self.check_output() def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient @@ -237,7 +246,7 @@ class TestWarpCTCOpCase1(TestWarpCTCOp): self.num_classes = CUDA_BLOCK_SIZE + 2 self.logits_lod = [[4, 1, 3, 3]] self.labels_lod = [[3, 1, 4, 4]] - self.blank = 0 + self.blank = self.num_classes - 1 self.norm_by_times = False @@ -267,7 +276,8 @@ class TestWarpCTCOpWithPadding(OpTest): dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, - self.blank, self.norm_by_times) + self.num_classes, self.batch_size, self.blank, + self.norm_by_times) loss = ctc.forward() max_sequence_length = 0 @@ -317,7 +327,7 @@ class TestWarpCTCOpWithPadding(OpTest): } def test_check_output(self): - self.check_output(check_dygraph=False) + self.check_output() def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient @@ -333,7 +343,7 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding): self.labels_lod = [[3, 1, 4, 4]] self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64) self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64) - self.blank = 0 + self.blank = self.num_classes - 1 self.norm_by_times = False @@ -389,5 +399,97 @@ class TestWarpCTCOpError(unittest.TestCase): self.assertRaises(TypeError, test_label_len_Variable) +class TestCTCLossAPICase(unittest.TestCase): + def test_functinal_api(self): + self.batch_size = 4 + self.num_classes = CUDA_BLOCK_SIZE + 2 + self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64) + self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64) + self.blank = self.num_classes - 1 + self.norm_by_times = False + + logits = np.random.uniform(0.1, 1.0, [ + max(self.logits_length), self.batch_size, self.num_classes + ]).astype("float32") + softmax = np.apply_along_axis(stable_softmax, -1, logits) + # labels should not be blank + labels = np.random.randint( + 0, + self.num_classes - 1, [self.batch_size, max(self.labels_length)], + dtype="int32") + + ctc = CTCForward(softmax, self.logits_length, labels, + self.labels_length, self.num_classes, self.batch_size, + self.blank, self.norm_by_times) + loss_np = ctc.forward() + + paddle.disable_static() + softmax = paddle.to_variable(logits) + labels = paddle.to_variable(labels) + logits_length = paddle.to_variable(self.logits_length) + labels_length = paddle.to_variable(self.labels_length) + loss_pd_mean = F.ctc_loss( + softmax, + labels, + logits_length, + labels_length, + blank=self.blank, + reduction='mean') + loss_pd_mean = loss_pd_mean.numpy() + + loss_pd_sum = F.ctc_loss( + softmax, + labels, + logits_length, + labels_length, + blank=self.blank, + reduction='sum') + loss_pd_sum = loss_pd_sum.numpy() + paddle.enable_static() + loss_np = np.squeeze(loss_np, axis=-1) + loss_np_mean = (loss_np / labels_length.numpy()).mean() + loss_np_sum = loss_np.sum() + + self.assertTrue(np.allclose(loss_pd_mean, loss_np_mean, atol=1)) + self.assertTrue(np.allclose(loss_pd_sum, loss_np_sum, atol=1)) + + def test_class_api(self): + self.batch_size = 3 + self.num_classes = 15 + self.logits_length = np.array([3, 3, 3], dtype=np.int64) + self.labels_length = np.array([0, 1, 2], dtype=np.int64) + self.blank = 0 + self.norm_by_times = False + + logits = np.random.uniform(0.1, 1.0, [ + max(self.logits_length), self.batch_size, self.num_classes + ]).astype("float32") + softmax = np.apply_along_axis(stable_softmax, -1, logits) + # labels should not be blank + labels = np.random.randint( + 1, + self.num_classes, [self.batch_size, max(self.labels_length)], + dtype="int32") + + ctc = CTCForward(softmax, self.logits_length, labels, + self.labels_length, self.num_classes, self.batch_size, + self.blank, self.norm_by_times) + loss_np = ctc.forward() + + paddle.disable_static() + softmax = paddle.to_variable(logits) + labels = paddle.to_variable(labels) + logits_length = paddle.to_variable(self.logits_length) + labels_length = paddle.to_variable(self.labels_length) + + loss_pd = paddle.nn.CTCLoss(self.blank, 'none')( + softmax, labels, logits_length, labels_length) + loss_pd = loss_pd.numpy() + paddle.enable_static() + loss_np = np.squeeze(loss_np, axis=-1) + + self.assertTrue(np.allclose(loss_pd, loss_np, atol=1)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py b/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py index b8258f3153a801dfc78db5f43325c0dce5c4b611..0de0eeb464ad700abb2144e49a822582b8653589 100644 --- a/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py @@ -26,4 +26,5 @@ no_check_set_white_list = [ 'cross_entropy2', 'seed', 'amp_check_finite_and_scale', + 'cudnn_lstm', ] diff --git a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py index ce6868b5c70ae1218df48f899f936f57f6734582..5300ab935a3405f9f76c08a7f2ece8bad33367ac 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py @@ -41,7 +41,8 @@ NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST = [ 'unpool', \ 'yolov3_loss', \ 'inverse', \ - 'bilateral_slice' + 'bilateral_slice',\ + 'cudnn_lstm' ] NEED_FIX_FP64_CHECK_OUTPUT_THRESHOLD_OP_LIST = ['bilinear_interp'] diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 20f1b453a0cd37aaf0888991a3f20c9e68c438d0..f01dc01973a603a0b6ea08358f73237c68924c78 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -15,7 +15,8 @@ # TODO: import framework api under this directory __all__ = [ 'create_global_var', 'create_parameter', 'ParamAttr', 'Variable', - 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace' + 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace', 'get_default_dtype', + 'set_default_dtype' ] __all__ += [ @@ -30,14 +31,18 @@ __all__ += [ from . import random from .random import manual_seed +from .framework import get_default_dtype +from .framework import set_default_dtype from ..fluid.framework import Variable #DEFINE_ALIAS +from ..fluid.framework import ComplexVariable #DEFINE_ALIAS from ..fluid.param_attr import ParamAttr #DEFINE_ALIAS from ..fluid.layers.tensor import create_global_var #DEFINE_ALIAS from ..fluid.layers.tensor import create_parameter #DEFINE_ALIAS from ..fluid.core import CPUPlace #DEFINE_ALIAS from ..fluid.core import CUDAPlace #DEFINE_ALIAS from ..fluid.core import CUDAPinnedPlace #DEFINE_ALIAS +from ..fluid.core import VarBase #DEFINE_ALIAS from paddle.fluid import core #DEFINE_ALIAS from ..fluid.dygraph.base import no_grad #DEFINE_ALIAS diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py index 65654b59c083086967c1ef78f14b740b0779e722..41ec18ce32d3036c3db86aaa98053f59ff61f717 100644 --- a/python/paddle/framework/framework.py +++ b/python/paddle/framework/framework.py @@ -13,5 +13,70 @@ # limitations under the License. # TODO: define framework api -# __all__ = ['set_default_dtype', -# 'get_default_dtype'] +from paddle.fluid.layer_helper_base import LayerHelperBase +from paddle.fluid.data_feeder import convert_dtype +import numpy as np + +__all__ = ['set_default_dtype', 'get_default_dtype'] + + +def set_default_dtype(d): + """ + Set default dtype. The default dtype is initially float32 + + Args: + d(string|np.dtype): the dtype to make the default. It only + supports float16, float32 and float64. + + Returns: + None. + + Examples: + .. code-block:: python + + import paddle + paddle.set_default_dtype("float32") + + """ + if isinstance(d, type): + if d in [np.float16, np.float32, np.float64]: + d = d.__name__ + else: + raise TypeError( + "set_default_dtype only supports [float16, float32, float64] " + ", but received %s" % d.__name__) + else: + if d in [ + 'float16', 'float32', 'float64', u'float16', u'float32', + u'float64' + ]: + # this code is a little bit dangerous, since error could happen + # when casting no-ascii code to str in python2. + # but since the set itself is limited, so currently, it is good. + # however, jointly supporting python2 and python3, (as well as python4 maybe) + # may still be a long-lasting problem. + d = str(d) + else: + raise TypeError( + "set_default_dtype only supports [float16, float32, float64] " + ", but received %s" % str(d)) + + LayerHelperBase.set_default_dtype(d) + + +def get_default_dtype(): + """ + Get the current default dtype. The default dtype is initially float32. + + Args: + None. + Returns: + The default dtype. + + Examples: + .. code-block:: python + + import paddle + paddle.get_default_dtype() + """ + return LayerHelperBase.get_default_dtype() diff --git a/python/paddle/incubate/complex/tensor/math.py b/python/paddle/incubate/complex/tensor/math.py index 52fdbcbc82be291f356067258789c876fede8f16..465e4887a1f8a8dc1d53afac8869f0b55776f3d2 100644 --- a/python/paddle/incubate/complex/tensor/math.py +++ b/python/paddle/incubate/complex/tensor/math.py @@ -262,7 +262,7 @@ def trace(x, offset=0, axis1=0, axis2=1, name=None): case1 = np.random.randn(3, 10, 10).astype('float64') + 1j * np.random.randn(3, 10, 10).astype('float64') paddle.disable_static() - case1 = paddle.to_variable(case1) + case1 = paddle.to_tensor(case1) data1 = paddle.complex.trace(case1, offset=1, axis1=1, axis2=2) # data1.shape = [3] """ complex_variable_exists([x], "trace") @@ -330,8 +330,8 @@ def sum(input, dim=None, keep_dim=False, name=None): """ complex_variable_exists([input], "sum") - real = math.sum(input.real, dim=dim, keep_dim=keep_dim, name=name) - imag = math.sum(input.imag, dim=dim, keep_dim=keep_dim, name=name) + real = math.sum(input.real, axis=dim, keepdim=keep_dim, name=name) + imag = math.sum(input.imag, axis=dim, keepdim=keep_dim, name=name) return ComplexVariable(real, imag) diff --git a/python/paddle/incubate/hapi/__init__.py b/python/paddle/incubate/hapi/__init__.py index a6b5faef57ca95188f0759f53753177e4f5946f3..c0361fa33246ff3315a107c520972ca6bebc8168 100644 --- a/python/paddle/incubate/hapi/__init__.py +++ b/python/paddle/incubate/hapi/__init__.py @@ -20,7 +20,6 @@ from . import download from . import model from .model import * -from . import metrics from . import datasets from . import distributed from . import vision @@ -39,7 +38,6 @@ __all__ = [ 'datasets', 'distributed', 'download', - 'metrics', 'vision', 'text', 'utils', diff --git a/python/paddle/incubate/hapi/callbacks.py b/python/paddle/incubate/hapi/callbacks.py index 741552511f9fdc93d9e370fc7d45f9d84a1d4392..0804708210a9749813e195a8b5579b339986acd6 100644 --- a/python/paddle/incubate/hapi/callbacks.py +++ b/python/paddle/incubate/hapi/callbacks.py @@ -295,8 +295,8 @@ class ProgBarLogger(Callback): import paddle.fluid as fluid import paddle.incubate.hapi as hapi - inputs = [hapi.Input('image', [-1, 1, 28, 28], 'float32')] - labels = [hapi.Input('label', [None, 1], 'int64')] + inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')] + labels = [hapi.Input([None, 1], 'int64', 'label')] train_dataset = hapi.datasets.MNIST(mode='train') @@ -305,8 +305,8 @@ class ProgBarLogger(Callback): optim = fluid.optimizer.Adam(0.001) model.prepare(optimizer=optim, - loss_function=paddle.nn.CrossEntropyLoss(), - metrics=hapi.metrics.Accuracy()) + loss=paddle.nn.CrossEntropyLoss(), + metrics=paddle.metric.Accuracy()) callback = hapi.callbacks.ProgBarLogger(log_freq=10) model.fit(train_dataset, batch_size=64, callbacks=callback) @@ -431,8 +431,8 @@ class ModelCheckpoint(Callback): import paddle.fluid as fluid import paddle.incubate.hapi as hapi - inputs = [hapi.Input('image', [-1, 1, 28, 28], 'float32')] - labels = [hapi.Input('label', [None, 1], 'int64')] + inputs = [hapi.Input([-1, 1, 28, 28], 'float32', 'image')] + labels = [hapi.Input([None, 1], 'int64', 'label')] train_dataset = hapi.datasets.MNIST(mode='train') @@ -441,8 +441,8 @@ class ModelCheckpoint(Callback): optim = fluid.optimizer.Adam(0.001) model.prepare(optimizer=optim, - loss_function=paddle.nn.CrossEntropyLoss(), - metrics=hapi.metrics.Accuracy()) + loss=paddle.nn.CrossEntropyLoss(), + metrics=paddle.metric.Accuracy()) callback = hapi.callbacks.ModelCheckpoint(save_dir='./temp') model.fit(train_dataset, batch_size=64, callbacks=callback) diff --git a/python/paddle/incubate/hapi/datasets/__init__.py b/python/paddle/incubate/hapi/datasets/__init__.py index fc5df6401992def4bc37329794e534a832924da3..a88b0e6bbf1975d97bfeb68025b978ce877c6baf 100644 --- a/python/paddle/incubate/hapi/datasets/__init__.py +++ b/python/paddle/incubate/hapi/datasets/__init__.py @@ -15,11 +15,41 @@ from . import folder from . import mnist from . import flowers +from . import cifar +from . import voc2012 +from . import conll05 +from . import imdb +from . import imikolov +from . import movielens +from . import movie_reviews +from . import uci_housing +from . import wmt14 +from . import wmt16 from .folder import * from .mnist import * from .flowers import * +from .cifar import * +from .voc2012 import * +from .conll05 import * +from .imdb import * +from .imikolov import * +from .movielens import * +from .movie_reviews import * +from .uci_housing import * +from .wmt14 import * +from .wmt16 import * __all__ = folder.__all__ \ - + mnist.__all__ \ - + flowers.__all__ + + mnist.__all__ \ + + flowers.__all__ \ + + cifar.__all__ \ + + voc2012.__all__ \ + + conll05.__all__ \ + + imdb.__all__ \ + + imikolov.__all__ \ + + movielens.__all__ \ + + movie_reviews.__all__ \ + + uci_housing.__all__ \ + + wmt14.__all__ \ + + wmt16.__all__ diff --git a/python/paddle/incubate/hapi/datasets/cifar.py b/python/paddle/incubate/hapi/datasets/cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..adfa786e615368ba90dab154924678de79104b55 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/cifar.py @@ -0,0 +1,207 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import tarfile +import numpy as np +import six +from six.moves import cPickle as pickle + +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ['Cifar10', 'Cifar100'] + +URL_PREFIX = 'https://dataset.bj.bcebos.com/cifar/' +CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' +CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' +CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz' +CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' + +MODE_FLAG_MAP = { + 'train10': 'data_batch', + 'test10': 'test_batch', + 'train100': 'train', + 'test100': 'test' +} + + +class Cifar10(Dataset): + """ + Implementation of `Cifar-10 `_ + dataset, which has 10 categories. + + Args: + data_file(str): path to data file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train', 'test' mode. Default 'train'. + transform(callable): transform to perform on image, None for on transform. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of cifar-10 dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Cifar10 + from paddle.incubate.hapi.vision.transforms import Normalize + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + self.fc = paddle.nn.Linear(3072, 10, act='softmax') + + def forward(self, image, label): + image = paddle.reshape(image, (3, -1)) + return self.fc(image), label + + paddle.disable_static() + + normalize = Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + cifar10 = Cifar10(mode='train', transform=normalize) + + for i in range(10): + image, label = cifar10[i] + image = paddle.to_tensor(image) + label = paddle.to_tensor(label) + + model = SimpleNet() + image, label = model(image, label) + print(image.numpy().shape, label.numpy().shape) + + """ + + def __init__(self, + data_file=None, + mode='train', + transform=None, + download=True): + assert mode.lower() in ['train', 'test', 'train', 'test'], \ + "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode) + self.mode = mode.lower() + + self._init_url_md5_flag() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download( + data_file, self.data_url, self.data_md5, 'cifar', download) + + self.transform = transform + + # read dataset into memory + self._load_data() + + def _init_url_md5_flag(self): + self.data_url = CIFAR10_URL + self.data_md5 = CIFAR10_MD5 + self.flag = MODE_FLAG_MAP[self.mode + '10'] + + def _load_data(self): + self.data = [] + with tarfile.open(self.data_file, mode='r') as f: + names = (each_item.name for each_item in f + if self.flag in each_item.name) + + for name in names: + if six.PY2: + batch = pickle.load(f.extractfile(name)) + else: + batch = pickle.load(f.extractfile(name), encoding='bytes') + + data = batch[six.b('data')] + labels = batch.get( + six.b('labels'), batch.get(six.b('fine_labels'), None)) + assert labels is not None + for sample, label in six.moves.zip(data, labels): + self.data.append((sample, label)) + + def __getitem__(self, idx): + image, label = self.data[idx] + if self.transform is not None: + image = self.transform(image) + return image, label + + def __len__(self): + return len(self.data) + + +class Cifar100(Cifar10): + """ + Implementation of `Cifar-100 `_ + dataset, which has 100 categories. + + Args: + data_file(str): path to data file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train', 'test' mode. Default 'train'. + transform(callable): transform to perform on image, None for on transform. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of cifar-100 dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Cifar100 + from paddle.incubate.hapi.vision.transforms import Normalize + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + self.fc = paddle.nn.Linear(3072, 100, act='softmax') + + def forward(self, image, label): + image = paddle.reshape(image, (3, -1)) + return self.fc(image), label + + paddle.disable_static() + + normalize = Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + cifar100 = Cifar100(mode='train', transform=normalize) + + for i in range(10): + image, label = cifar100[i] + image = paddle.to_tensor(image) + label = paddle.to_tensor(label) + + model = SimpleNet() + image, label = model(image, label) + print(image.numpy().shape, label.numpy().shape) + + """ + + def __init__(self, + data_file=None, + mode='train', + transform=None, + download=True): + super(Cifar100, self).__init__(data_file, mode, transform, download) + + def _init_url_md5_flag(self): + self.data_url = CIFAR100_URL + self.data_md5 = CIFAR100_MD5 + self.flag = MODE_FLAG_MAP[self.mode + '100'] diff --git a/python/paddle/incubate/hapi/datasets/conll05.py b/python/paddle/incubate/hapi/datasets/conll05.py new file mode 100644 index 0000000000000000000000000000000000000000..094e3559335363524c4ae893f70294a4afaa7037 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/conll05.py @@ -0,0 +1,297 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import gzip +import tarfile +import numpy as np +import six +from six.moves import cPickle as pickle + +from paddle.io import Dataset +import paddle.compat as cpt +from .utils import _check_exists_and_download + +__all__ = ['Conll05st'] + +DATA_URL = 'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz' +DATA_MD5 = '387719152ae52d60422c016e92a742fc' +WORDDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt' +WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' +VERBDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt' +VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' +TRGDICT_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt' +TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' +EMB_URL = 'http://paddlemodels.bj.bcebos.com/conll05st%2Femb' +EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' + +UNK_IDX = 0 + + +class Conll05st(Dataset): + """ + Implementation of `Conll05st `_ + test dataset. + + Note: only support download test dataset automatically for that + only test dataset of Conll05st is public. + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + word_dict_file(str): path to word dictionary file, can be set None if + :attr:`download` is True. Default None + verb_dict_file(str): path to verb dictionary file, can be set None if + :attr:`download` is True. Default None + target_dict_file(str): path to target dictionary file, can be set None if + :attr:`download` is True. Default None + emb_file(str): path to embedding dictionary file, only used for + :code:`get_embedding` can be set None if :attr:`download` is + True. Default None + download(bool): whether to download dataset automatically if + :attr:`data_file` :attr:`word_dict_file` :attr:`verb_dict_file` + :attr:`target_dict_file` is not set. Default True + + Returns: + Dataset: instance of conll05st dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Conll05st + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, pred_idx, mark, label): + return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label) + + paddle.disable_static() + + conll05st = Conll05st() + + for i in range(10): + pred_idx, mark, label= conll05st[i][-3:] + pred_idx = paddle.to_tensor(pred_idx) + mark = paddle.to_tensor(mark) + label = paddle.to_tensor(label) + + model = SimpleNet() + pred_idx, mark, label= model(pred_idx, mark, label) + print(pred_idx.numpy(), mark.numpy(), label.numpy()) + + """ + + def __init__(self, + data_file=None, + word_dict_file=None, + verb_dict_file=None, + target_dict_file=None, + emb_file=None, + download=True): + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download( + data_file, DATA_URL, DATA_MD5, 'conll05st', download) + + self.word_dict_file = word_dict_file + if self.word_dict_file is None: + assert download, "word_dict_file is not set and downloading automatically is disabled" + self.word_dict_file = _check_exists_and_download( + word_dict_file, WORDDICT_URL, WORDDICT_MD5, 'conll05st', + download) + + self.verb_dict_file = verb_dict_file + if self.verb_dict_file is None: + assert download, "verb_dict_file is not set and downloading automatically is disabled" + self.verb_dict_file = _check_exists_and_download( + verb_dict_file, VERBDICT_URL, VERBDICT_MD5, 'conll05st', + download) + + self.target_dict_file = target_dict_file + if self.target_dict_file is None: + assert download, "target_dict_file is not set and downloading automatically is disabled" + self.target_dict_file = _check_exists_and_download( + target_dict_file, TRGDICT_URL, TRGDICT_MD5, 'conll05st', + download) + + self.word_dict = self._load_dict(self.word_dict_file) + self.predicate_dict = self._load_dict(self.verb_dict_file) + self.label_dict = self._load_label_dict(self.target_dict_file) + + # read dataset into memory + self._load_anno() + + def _load_label_dict(self, filename): + d = dict() + tag_dict = set() + with open(filename, 'r') as f: + for i, line in enumerate(f): + line = line.strip() + if line.startswith("B-"): + tag_dict.add(line[2:]) + elif line.startswith("I-"): + tag_dict.add(line[2:]) + index = 0 + for tag in tag_dict: + d["B-" + tag] = index + index += 1 + d["I-" + tag] = index + index += 1 + d["O"] = index + return d + + def _load_dict(self, filename): + d = dict() + with open(filename, 'r') as f: + for i, line in enumerate(f): + d[line.strip()] = i + return d + + def _load_anno(self): + tf = tarfile.open(self.data_file) + wf = tf.extractfile( + "conll05st-release/test.wsj/words/test.wsj.words.gz") + pf = tf.extractfile( + "conll05st-release/test.wsj/props/test.wsj.props.gz") + self.sentences = [] + self.predicates = [] + self.labels = [] + with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile( + fileobj=pf) as props_file: + sentences = [] + labels = [] + one_seg = [] + for word, label in zip(words_file, props_file): + word = cpt.to_text(word.strip()) + label = cpt.to_text(label.strip().split()) + + if len(label) == 0: # end of sentence + for i in range(len(one_seg[0])): + a_kind_lable = [x[i] for x in one_seg] + labels.append(a_kind_lable) + + if len(labels) >= 1: + verb_list = [] + for x in labels[0]: + if x != '-': + verb_list.append(x) + + for i, lbl in enumerate(labels[1:]): + cur_tag = 'O' + is_in_bracket = False + lbl_seq = [] + verb_word = '' + for l in lbl: + if l == '*' and is_in_bracket == False: + lbl_seq.append('O') + elif l == '*' and is_in_bracket == True: + lbl_seq.append('I-' + cur_tag) + elif l == '*)': + lbl_seq.append('I-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') != -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') == -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = True + else: + raise RuntimeError('Unexpected label: %s' % + l) + + self.sentences.append(sentences) + self.predicates.append(verb_list[i]) + self.labels.append(lbl_seq) + + sentences = [] + labels = [] + one_seg = [] + else: + sentences.append(word) + one_seg.append(label) + + pf.close() + wf.close() + tf.close() + + def __getitem__(self, idx): + sentence = self.sentences[idx] + predicate = self.predicates[idx] + labels = self.labels[idx] + + sen_len = len(sentence) + + verb_index = labels.index('B-V') + mark = [0] * len(labels) + if verb_index > 0: + mark[verb_index - 1] = 1 + ctx_n1 = sentence[verb_index - 1] + else: + ctx_n1 = 'bos' + + if verb_index > 1: + mark[verb_index - 2] = 1 + ctx_n2 = sentence[verb_index - 2] + else: + ctx_n2 = 'bos' + + mark[verb_index] = 1 + ctx_0 = sentence[verb_index] + + if verb_index < len(labels) - 1: + mark[verb_index + 1] = 1 + ctx_p1 = sentence[verb_index + 1] + else: + ctx_p1 = 'eos' + + if verb_index < len(labels) - 2: + mark[verb_index + 2] = 1 + ctx_p2 = sentence[verb_index + 2] + else: + ctx_p2 = 'eos' + + word_idx = [self.word_dict.get(w, UNK_IDX) for w in sentence] + + ctx_n2_idx = [self.word_dict.get(ctx_n2, UNK_IDX)] * sen_len + ctx_n1_idx = [self.word_dict.get(ctx_n1, UNK_IDX)] * sen_len + ctx_0_idx = [self.word_dict.get(ctx_0, UNK_IDX)] * sen_len + ctx_p1_idx = [self.word_dict.get(ctx_p1, UNK_IDX)] * sen_len + ctx_p2_idx = [self.word_dict.get(ctx_p2, UNK_IDX)] * sen_len + + pred_idx = [self.predicate_dict.get(predicate)] * sen_len + label_idx = [self.label_dict.get(w) for w in labels] + + return (np.array(word_idx), np.array(ctx_n2_idx), np.array(ctx_n1_idx), + np.array(ctx_0_idx), np.array(ctx_p1_idx), np.array(ctx_p2_idx), + np.array(pred_idx), np.array(mark), np.array(label_idx)) + + def __len__(self): + return len(self.sentences) + + def get_dict(self): + """ + Get the word, verb and label dictionary of Wikipedia corpus. + """ + return self.word_dict, self.predicate_dict, self.label_dict + + def get_embedding(self): + return self.emb_file diff --git a/python/paddle/incubate/hapi/datasets/flowers.py b/python/paddle/incubate/hapi/datasets/flowers.py index 6f56cc82c1cba800002d82cc8a2bd5ddae619f9e..141d2a53b577b8c9be9ac153a36c5b2fa51ded77 100644 --- a/python/paddle/incubate/hapi/datasets/flowers.py +++ b/python/paddle/incubate/hapi/datasets/flowers.py @@ -36,12 +36,13 @@ SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' # In official 'readme', tstid is the flag of test data # and trnid is the flag of train data. But test data is more than train data. # So we exchange the train data and test data. -MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"} +MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': 'valid'} class Flowers(Dataset): """ - Implement of flowers dataset + Implementation of `Flowers `_ + dataset Args: data_file(str): path to data file, can be set None if @@ -51,9 +52,9 @@ class Flowers(Dataset): setid_file(str): path to subset index file, can be set None if :attr:`download` is True. Default None mode(str): 'train', 'valid' or 'test' mode. Default 'train'. - download(bool): whether auto download mnist dataset if - :attr:`image_path`/:attr:`label_path` unset. Default - True + transform(callable): transform to perform on image, None for on transform. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True Examples: @@ -82,19 +83,19 @@ class Flowers(Dataset): self.data_file = data_file if self.data_file is None: - assert download, "data_file not set and auto download disabled" + assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, DATA_URL, DATA_MD5, 'flowers', download) self.label_file = label_file if self.label_file is None: - assert download, "label_file not set and auto download disabled" + assert download, "label_file is not set and downloading automatically is disabled" self.label_file = _check_exists_and_download( label_file, LABEL_URL, LABEL_MD5, 'flowers', download) self.setid_file = setid_file if self.setid_file is None: - assert download, "setid_file not set and auto download disabled" + assert download, "setid_file is not set and downloading automatically is disabled" self.setid_file = _check_exists_and_download( setid_file, SETID_URL, SETID_MD5, 'flowers', download) diff --git a/python/paddle/incubate/hapi/datasets/imdb.py b/python/paddle/incubate/hapi/datasets/imdb.py new file mode 100644 index 0000000000000000000000000000000000000000..12d166bc784a382ac5ae70491d3e8061ad1d1e9f --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/imdb.py @@ -0,0 +1,144 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import re +import six +import string +import tarfile +import numpy as np +import collections + +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ['Imdb'] + +URL = 'https://dataset.bj.bcebos.com/imdb%2FaclImdb_v1.tar.gz' +MD5 = '7c2ac02c03563afcf9b574c7e56c153a' + + +class Imdb(Dataset): + """ + Implementation of `IMDB `_ dataset. + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train' 'test' mode. Default 'train'. + cutoff(int): cutoff number for building word dictionary. Default 150. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of IMDB dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Imdb + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, doc, label): + return paddle.sum(doc), label + + paddle.disable_static() + + imdb = Imdb(mode='train') + + for i in range(10): + doc, label = imdb[i] + doc = paddle.to_tensor(doc) + label = paddle.to_tensor(label) + + model = SimpleNet() + image, label = model(doc, label) + print(doc.numpy().shape, label.numpy().shape) + + """ + + def __init__(self, data_file=None, mode='train', cutoff=150, download=True): + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train', 'test', but got {}".format(mode) + self.mode = mode.lower() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download(data_file, URL, MD5, + 'imdb', download) + + # Build a word dictionary from the corpus + self.word_idx = self._build_work_dict(cutoff) + + # read dataset into memory + self._load_anno() + + def _build_work_dict(self, cutoff): + word_freq = collections.defaultdict(int) + pattern = re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$") + for doc in self._tokenize(pattern): + for word in doc: + word_freq[word] += 1 + + # Not sure if we should prune less-frequent words here. + word_freq = [x for x in six.iteritems(word_freq) if x[1] > cutoff] + + dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) + words, _ = list(zip(*dictionary)) + word_idx = dict(list(zip(words, six.moves.range(len(words))))) + word_idx[''] = len(words) + return word_idx + + def _tokenize(self, pattern): + data = [] + with tarfile.open(self.data_file) as tarf: + tf = tarf.next() + while tf != None: + if bool(pattern.match(tf.name)): + # newline and punctuations removal and ad-hoc tokenization. + data.append( + tarf.extractfile(tf).read().rstrip(six.b("\n\r")) + .translate(None, six.b(string.punctuation)).lower( + ).split()) + tf = tarf.next() + + return data + + def _load_anno(self): + pos_pattern = re.compile("aclImdb/{}/pos/.*\.txt$".format(self.mode)) + neg_pattern = re.compile("aclImdb/{}/neg/.*\.txt$".format(self.mode)) + + UNK = self.word_idx[''] + + self.docs = [] + self.labels = [] + for doc in self._tokenize(pos_pattern): + self.docs.append([self.word_idx.get(w, UNK) for w in doc]) + self.labels.append(0) + for doc in self._tokenize(neg_pattern): + self.docs.append([self.word_idx.get(w, UNK) for w in doc]) + self.labels.append(1) + + def __getitem__(self, idx): + return (np.array(self.docs[idx]), np.array([self.labels[idx]])) + + def __len__(self): + return len(self.docs) diff --git a/python/paddle/incubate/hapi/datasets/imikolov.py b/python/paddle/incubate/hapi/datasets/imikolov.py new file mode 100644 index 0000000000000000000000000000000000000000..2e6ad43b506265ee8c9c8617a87eba5a041632bd --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/imikolov.py @@ -0,0 +1,171 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import six +import tarfile +import numpy as np +import collections + +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ['Imikolov'] + +URL = 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz' +MD5 = '30177ea32e27c525793142b6bf2c8e2d' + + +class Imikolov(Dataset): + """ + Implementation of imikolov dataset. + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + data_type(str): 'NGRAM' or 'SEQ'. Default 'NGRAM'. + window_size(int): sliding window size for 'NGRAM' data. Default -1. + mode(str): 'train' 'test' mode. Default 'train'. + min_word_freq(int): minimal word frequence for building word dictionary. Default 50. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of imikolov dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Imikolov + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, src, trg): + return paddle.sum(src), paddle.sum(trg) + + paddle.disable_static() + + imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2) + + for i in range(10): + src, trg = imikolov[i] + src = paddle.to_tensor(src) + trg = paddle.to_tensor(trg) + + model = SimpleNet() + src, trg = model(src, trg) + print(src.numpy().shape, trg.numpy().shape) + + """ + + def __init__(self, + data_file=None, + data_type='NGRAM', + window_size=-1, + mode='train', + min_word_freq=50, + download=True): + assert data_type.upper() in ['NGRAM', 'SEQ'], \ + "data type should be 'NGRAM', 'SEQ', but got {}".format(data_type) + self.data_type = data_type.upper() + + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train', 'test', but got {}".format(mode) + self.mode = mode.lower() + + self.window_size = window_size + self.min_word_freq = min_word_freq + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically disabled" + self.data_file = _check_exists_and_download(data_file, URL, MD5, + 'imikolov', download) + + # Build a word dictionary from the corpus + self.word_idx = self._build_work_dict(min_word_freq) + + # read dataset into memory + self._load_anno() + + def word_count(self, f, word_freq=None): + if word_freq is None: + word_freq = collections.defaultdict(int) + + for l in f: + for w in l.strip().split(): + word_freq[w] += 1 + word_freq[''] += 1 + word_freq[''] += 1 + + return word_freq + + def _build_work_dict(self, cutoff): + train_filename = './simple-examples/data/ptb.train.txt' + test_filename = './simple-examples/data/ptb.valid.txt' + with tarfile.open(self.data_file) as tf: + trainf = tf.extractfile(train_filename) + testf = tf.extractfile(test_filename) + word_freq = self.word_count(testf, self.word_count(trainf)) + if '' in word_freq: + # remove for now, since we will set it as last index + del word_freq[''] + + word_freq = [ + x for x in six.iteritems(word_freq) if x[1] > self.min_word_freq + ] + + word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) + words, _ = list(zip(*word_freq_sorted)) + word_idx = dict(list(zip(words, six.moves.range(len(words))))) + word_idx[''] = len(words) + + return word_idx + + def _load_anno(self): + self.data = [] + with tarfile.open(self.data_file) as tf: + filename = './simple-examples/data/ptb.{}.txt'.format(self.mode) + f = tf.extractfile(filename) + + UNK = self.word_idx[''] + for l in f: + if self.data_type == 'NGRAM': + assert self.window_size > -1, 'Invalid gram length' + l = [''] + l.strip().split() + [''] + if len(l) >= self.window_size: + l = [self.word_idx.get(w, UNK) for w in l] + for i in six.moves.range(self.window_size, len(l) + 1): + self.data.append(tuple(l[i - self.window_size:i])) + elif self.data_type == 'SEQ': + l = l.strip().split() + l = [self.word_idx.get(w, UNK) for w in l] + src_seq = [self.word_idx['']] + l + trg_seq = l + [self.word_idx['']] + if self.window_size > 0 and len(src_seq) > self.window_size: + continue + self.data.append((src_seq, trg_seq)) + else: + assert False, 'Unknow data type' + + def __getitem__(self, idx): + return tuple([np.array(d) for d in self.data[idx]]) + + def __len__(self): + return len(self.data) diff --git a/python/paddle/incubate/hapi/datasets/mnist.py b/python/paddle/incubate/hapi/datasets/mnist.py index bd48ca1c9668b40ac0379bfeda11a5c056f9fd44..ed046e5a1d9bbcc33f3148c6ecde8a349e478cb0 100644 --- a/python/paddle/incubate/hapi/datasets/mnist.py +++ b/python/paddle/incubate/hapi/datasets/mnist.py @@ -38,7 +38,7 @@ TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432' class MNIST(Dataset): """ - Implement of MNIST dataset + Implementation of `MNIST `_ dataset Args: image_path(str): path to image file, can be set None if @@ -48,9 +48,8 @@ class MNIST(Dataset): chw_format(bool): If set True, the output shape is [1, 28, 28], otherwise, output shape is [1, 784]. Default True. mode(str): 'train' or 'test' mode. Default 'train'. - download(bool): whether auto download mnist dataset if - :attr:`image_path`/:attr:`label_path` unset. Default - True + download(bool): whether to download dataset automatically if + :attr:`image_path` :attr:`label_path` is not set. Default True Returns: Dataset: MNIST Dataset. @@ -82,7 +81,7 @@ class MNIST(Dataset): self.chw_format = chw_format self.image_path = image_path if self.image_path is None: - assert download, "image_path not set and auto download disabled" + assert download, "image_path is not set and downloading automatically is disabled" image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5 self.image_path = _check_exists_and_download( @@ -90,9 +89,9 @@ class MNIST(Dataset): self.label_path = label_path if self.label_path is None: - assert download, "label_path not set and auto download disabled" - label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL - label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5 + assert download, "label_path is not set and downloading automatically is disabled" + label_url = TRAIN_LABEL_URL if self.mode == 'train' else TEST_LABEL_URL + label_md5 = TRAIN_LABEL_MD5 if self.mode == 'train' else TEST_LABEL_MD5 self.label_path = _check_exists_and_download( label_path, label_url, label_md5, 'mnist', download) diff --git a/python/paddle/incubate/hapi/datasets/movie_reviews.py b/python/paddle/incubate/hapi/datasets/movie_reviews.py new file mode 100644 index 0000000000000000000000000000000000000000..7bf0684ebcd315807b9dc736c5481383073e5ba8 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/movie_reviews.py @@ -0,0 +1,173 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import six +import numpy as np +import collections +import nltk +from nltk.corpus import movie_reviews +import zipfile +from functools import cmp_to_key +from itertools import chain + +import paddle +from paddle.io import Dataset + +__all__ = ['MovieReviews'] + +URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip" +MD5 = '155de2b77c6834dd8eea7cbe88e93acb' + +NUM_TRAINING_INSTANCES = 1600 +NUM_TOTAL_INSTANCES = 2000 + + +class MovieReviews(Dataset): + """ + Implementation of `NLTK movie reviews `_ dataset. + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train' 'test' mode. Default 'train'. + download(bool): whether auto download cifar dataset if + :attr:`data_file` unset. Default True. + + Returns: + Dataset: instance of movie reviews dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import MovieReviews + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, word, category): + return paddle.sum(word), category + + paddle.disable_static() + + movie_reviews = MovieReviews(mode='train') + + for i in range(10): + word_list, category = movie_reviews[i] + word_list = paddle.to_tensor(word_list) + category = paddle.to_tensor(category) + + model = SimpleNet() + word_list, category = model(word_list, category) + print(word_list.numpy().shape, category.numpy()) + + """ + + def __init__(self, mode='train'): + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train', 'test', but got {}".format(mode) + self.mode = mode.lower() + + self._download_data_if_not_yet() + + # read dataset into memory + self._load_sentiment_data() + + def _get_word_dict(self): + """ + Sorted the words by the frequency of words which occur in sample + :return: + words_freq_sorted + """ + words_freq_sorted = list() + word_freq_dict = collections.defaultdict(int) + + for category in movie_reviews.categories(): + for field in movie_reviews.fileids(category): + for words in movie_reviews.words(field): + word_freq_dict[words] += 1 + words_sort_list = list(six.iteritems(word_freq_dict)) + words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1])) + for index, word in enumerate(words_sort_list): + words_freq_sorted.append((word[0], index)) + return words_freq_sorted + + def _sort_files(self): + """ + Sorted the sample for cross reading the sample + :return: + files_list + """ + files_list = list() + neg_file_list = movie_reviews.fileids('neg') + pos_file_list = movie_reviews.fileids('pos') + files_list = list( + chain.from_iterable(list(zip(neg_file_list, pos_file_list)))) + return files_list + + def _load_sentiment_data(self): + """ + Load the data set + :return: + data_set + """ + self.data = [] + words_ids = dict(self._get_word_dict()) + for sample_file in self._sort_files(): + words_list = list() + category = 0 if 'neg' in sample_file else 1 + for word in movie_reviews.words(sample_file): + words_list.append(words_ids[word.lower()]) + self.data.append((words_list, category)) + + def _download_data_if_not_yet(self): + """ + Download the data set, if the data set is not download. + """ + try: + # download and extract movie_reviews.zip + paddle.dataset.common.download( + URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip') + path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora') + filename = os.path.join(path, 'movie_reviews.zip') + zip_file = zipfile.ZipFile(filename) + zip_file.extractall(path) + zip_file.close() + # make sure that nltk can find the data + if paddle.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.dataset.common.DATA_HOME) + movie_reviews.categories() + except LookupError: + print("Downloading movie_reviews data set, please wait.....") + nltk.download( + 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) + print("Download data set success.....") + print("Path is " + nltk.data.find('corpora/movie_reviews').path) + + def __getitem__(self, idx): + if self.mode == 'test': + idx += NUM_TRAINING_INSTANCES + data = self.data[idx] + return np.array(data[0]), np.array(data[1]) + + def __len__(self): + if self.mode == 'train': + return NUM_TRAINING_INSTANCES + else: + return NUM_TOTAL_INSTANCES - NUM_TRAINING_INSTANCES diff --git a/python/paddle/incubate/hapi/datasets/movielens.py b/python/paddle/incubate/hapi/datasets/movielens.py new file mode 100644 index 0000000000000000000000000000000000000000..228e9dc6d477cf539683963dc6ddaa3c02c8fe95 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/movielens.py @@ -0,0 +1,219 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import zipfile +import re +import random +import functools +import six + +import paddle +from paddle.io import Dataset +import paddle.compat as cpt +from .utils import _check_exists_and_download + +__all__ = ['Movielens'] + +age_table = [1, 18, 25, 35, 45, 50, 56] + +URL = 'https://dataset.bj.bcebos.com/movielens%2Fml-1m.zip' +MD5 = 'c4d9eecfca2ab87c1945afe126590906' + + +class MovieInfo(object): + """ + Movie id, title and categories information are stored in MovieInfo. + """ + + def __init__(self, index, categories, title): + self.index = int(index) + self.categories = categories + self.title = title + + def value(self, categories_dict, movie_title_dict): + """ + Get information from a movie. + """ + return [[self.index], [categories_dict[c] for c in self.categories], + [movie_title_dict[w.lower()] for w in self.title.split()]] + + def __str__(self): + return "" % ( + self.index, self.title, self.categories) + + def __repr__(self): + return self.__str__() + + +class UserInfo(object): + """ + User id, gender, age, and job information are stored in UserInfo. + """ + + def __init__(self, index, gender, age, job_id): + self.index = int(index) + self.is_male = gender == 'M' + self.age = age_table.index(int(age)) + self.job_id = int(job_id) + + def value(self): + """ + Get information from a user. + """ + return [[self.index], [0 if self.is_male else 1], [self.age], + [self.job_id]] + + def __str__(self): + return "" % ( + self.index, "M" + if self.is_male else "F", age_table[self.age], self.job_id) + + def __repr__(self): + return str(self) + + +class Movielens(Dataset): + """ + Implementation of `Movielens 1-M `_ dataset. + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train' or 'test' mode. Default 'train'. + test_ratio(float): split ratio for test sample. Default 0.1. + rand_seed(int): random seed. Default 0. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of Movielens 1-M dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import Movielens + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, category, title, rating): + return paddle.sum(category), paddle.sum(title), paddle.sum(rating) + + paddle.disable_static() + + movielens = Movielens(mode='train') + + for i in range(10): + category, title, rating = movielens[i][-3:] + category = paddle.to_tensor(category) + title = paddle.to_tensor(title) + rating = paddle.to_tensor(rating) + + model = SimpleNet() + category, title, rating = model(category, title, rating) + print(category.numpy().shape, title.numpy().shape, rating.numpy().shape) + + """ + + def __init__(self, + data_file=None, + mode='train', + test_ratio=0.1, + rand_seed=0, + download=True): + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train', 'test', but got {}".format(mode) + self.mode = mode.lower() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download(data_file, URL, MD5, + 'sentiment', download) + + self.test_ratio = test_ratio + self.rand_seed = rand_seed + + np.random.seed(rand_seed) + self._load_meta_info() + self._load_data() + + def _load_meta_info(self): + pattern = re.compile(r'^(.*)\((\d+)\)$') + self.movie_info = dict() + self.movie_title_dict = dict() + self.categories_dict = dict() + self.user_info = dict() + with zipfile.ZipFile(self.data_file) as package: + for info in package.infolist(): + assert isinstance(info, zipfile.ZipInfo) + title_word_set = set() + categories_set = set() + with package.open('ml-1m/movies.dat') as movie_file: + for i, line in enumerate(movie_file): + line = cpt.to_text(line, encoding='latin') + movie_id, title, categories = line.strip().split('::') + categories = categories.split('|') + for c in categories: + categories_set.add(c) + title = pattern.match(title).group(1) + self.movie_info[int(movie_id)] = MovieInfo( + index=movie_id, categories=categories, title=title) + for w in title.split(): + title_word_set.add(w.lower()) + + for i, w in enumerate(title_word_set): + self.movie_title_dict[w] = i + + for i, c in enumerate(categories_set): + self.categories_dict[c] = i + + with package.open('ml-1m/users.dat') as user_file: + for line in user_file: + line = cpt.to_text(line, encoding='latin') + uid, gender, age, job, _ = line.strip().split("::") + self.user_info[int(uid)] = UserInfo( + index=uid, gender=gender, age=age, job_id=job) + + def _load_data(self): + self.data = [] + is_test = self.mode == 'test' + with zipfile.ZipFile(self.data_file) as package: + with package.open('ml-1m/ratings.dat') as rating: + for line in rating: + line = cpt.to_text(line, encoding='latin') + if (np.random.random() < self.test_ratio) == is_test: + uid, mov_id, rating, _ = line.strip().split("::") + uid = int(uid) + mov_id = int(mov_id) + rating = float(rating) * 2 - 5.0 + + mov = self.movie_info[mov_id] + usr = self.user_info[uid] + self.data.append(usr.value() + \ + mov.value(self.categories_dict, self.movie_title_dict) + \ + [[rating]]) + + def __getitem__(self, idx): + data = self.data[idx] + return tuple([np.array(d) for d in data]) + + def __len__(self): + return len(self.data) diff --git a/python/paddle/incubate/hapi/datasets/uci_housing.py b/python/paddle/incubate/hapi/datasets/uci_housing.py new file mode 100644 index 0000000000000000000000000000000000000000..c1f2c4a5bb5d9d60ba1316e3e2a5f174df94fe99 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/uci_housing.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import six +import numpy as np + +import paddle.dataset.common +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ["UCIHousing"] + +URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data' +MD5 = 'd4accdce7a25600298819f8e28e8d593' +feature_names = [ + 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', + 'PTRATIO', 'B', 'LSTAT' +] + + +class UCIHousing(Dataset): + """ + Implementation of `UCI housing `_ + dataset + + Args: + data_file(str): path to data file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train' or 'test' mode. Default 'train'. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of UCI housing dataset. + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import UCIHousing + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, feature, target): + return paddle.sum(feature), target + + paddle.disable_static() + + uci_housing = UCIHousing(mode='train') + + for i in range(10): + feature, target = uci_housing[i] + feature = paddle.to_tensor(feature) + target = paddle.to_tensor(target) + + model = SimpleNet() + feature, target = model(feature, target) + print(feature.numpy().shape, target.numpy()) + + """ + + def __init__(self, data_file=None, mode='train', download=True): + assert mode.lower() in ['train', 'test'], \ + "mode should be 'train' or 'test', but got {}".format(mode) + self.mode = mode.lower() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download(data_file, URL, MD5, + 'uci_housing', download) + + # read dataset into memory + self._load_data() + + def _load_data(self, feature_num=14, ratio=0.8): + data = np.fromfile(self.data_file, sep=' ') + data = data.reshape(data.shape[0] // feature_num, feature_num) + maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum( + axis=0) / data.shape[0] + for i in six.moves.range(feature_num - 1): + data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) + offset = int(data.shape[0] * ratio) + if self.mode == 'train': + self.data = data[:offset] + elif self.mode == 'test': + self.data = data[offset:] + + def __getitem__(self, idx): + data = self.data[idx] + return np.array(data[:-1]), np.array(data[-1:]) + + def __len__(self): + return len(self.data) diff --git a/python/paddle/incubate/hapi/datasets/voc2012.py b/python/paddle/incubate/hapi/datasets/voc2012.py new file mode 100644 index 0000000000000000000000000000000000000000..1811c455db530710a0559c077975ab08d6a94ac3 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/voc2012.py @@ -0,0 +1,137 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import io +import tarfile +import numpy as np +from PIL import Image + +from paddle.io import Dataset +from .utils import _check_exists_and_download + +__all__ = ["VOC2012"] + +VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\ +VOCtrainval_11-May-2012.tar' + +VOC_MD5 = '6cd6e144f989b92b3379bac3b3de84fd' +SET_FILE = 'VOCdevkit/VOC2012/ImageSets/Segmentation/{}.txt' +DATA_FILE = 'VOCdevkit/VOC2012/JPEGImages/{}.jpg' +LABEL_FILE = 'VOCdevkit/VOC2012/SegmentationClass/{}.png' + +CACHE_DIR = 'voc2012' + +MODE_FLAG_MAP = {'train': 'trainval', 'test': 'train', 'valid': "val"} + + +class VOC2012(Dataset): + """ + Implementation of `VOC2012 `_ dataset + + Args: + data_file(str): path to data file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train', 'valid' or 'test' mode. Default 'train'. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import VOC2012 + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, image, label): + return paddle.sum(image), label + + paddle.disable_static() + + voc2012 = VOC2012(mode='train') + + for i in range(10): + image, label= voc2012[i] + image = paddle.cast(paddle.to_tensor(image), 'float32') + label = paddle.to_tensor(label) + + model = SimpleNet() + image, label= model(image, label) + print(image.numpy().shape, label.numpy().shape) + + """ + + def __init__(self, + data_file=None, + mode='train', + transform=None, + download=True): + assert mode.lower() in ['train', 'valid', 'test'], \ + "mode should be 'train', 'valid' or 'test', but got {}".format(mode) + self.flag = MODE_FLAG_MAP[mode.lower()] + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download( + data_file, VOC_URL, VOC_MD5, CACHE_DIR, download) + self.transform = transform + + # read dataset into memory + self._load_anno() + + def _load_anno(self): + self.name2mem = {} + self.data_tar = tarfile.open(self.data_file) + for ele in self.data_tar.getmembers(): + self.name2mem[ele.name] = ele + + set_file = SET_FILE.format(self.flag) + sets = self.data_tar.extractfile(self.name2mem[set_file]) + + self.data = [] + self.labels = [] + + for line in sets: + line = line.strip() + data = DATA_FILE.format(line.decode('utf-8')) + label = LABEL_FILE.format(line.decode('utf-8')) + self.data.append(data) + self.labels.append(label) + + def __getitem__(self, idx): + data_file = self.data[idx] + label_file = self.labels[idx] + + data = self.data_tar.extractfile(self.name2mem[data_file]).read() + label = self.data_tar.extractfile(self.name2mem[label_file]).read() + data = Image.open(io.BytesIO(data)) + label = Image.open(io.BytesIO(label)) + data = np.array(data) + label = np.array(label) + if self.transform is not None: + data = self.transform(data) + return data, label + + def __len__(self): + return len(self.data) + + def __del__(self): + if self.data_tar: + self.data_tar.close() diff --git a/python/paddle/incubate/hapi/datasets/wmt14.py b/python/paddle/incubate/hapi/datasets/wmt14.py new file mode 100644 index 0000000000000000000000000000000000000000..b495ea931a80425b8e24b81cdf8fdfd2c0920a3e --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/wmt14.py @@ -0,0 +1,179 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import tarfile +import numpy as np +import gzip + +from paddle.io import Dataset +import paddle.compat as cpt +from .utils import _check_exists_and_download + +__all__ = ['WMT14'] + +URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' + 'cslm_joint_paper/data/dev+test.tgz') +MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' +# this is a small set of data for test. The original data is too large and +# will be add later. +URL_TRAIN = ('http://paddlemodels.bj.bcebos.com/wmt/wmt14.tgz') +MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c' + +START = "" +END = "" +UNK = "" +UNK_IDX = 2 + + +class WMT14(Dataset): + """ + Implementation of `WMT14 `_ test dataset. + The original WMT14 dataset is too large and a small set of data for set is + provided. This module will download dataset from + http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train', 'test' or 'gen'. Default 'train' + dict_size(int): word dictionary size. Default -1. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of WMT14 dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import WMT14 + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, src_ids, trg_ids, trg_ids_next): + return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) + + paddle.disable_static() + + wmt14 = WMT14(mode='train', dict_size=50) + + for i in range(10): + src_ids, trg_ids, trg_ids_next = wmt14[i] + src_ids = paddle.to_tensor(src_ids) + trg_ids = paddle.to_tensor(trg_ids) + trg_ids_next = paddle.to_tensor(trg_ids_next) + + model = SimpleNet() + src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) + print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) + + """ + + def __init__(self, + data_file=None, + mode='train', + dict_size=-1, + download=True): + assert mode.lower() in ['train', 'test', 'gen'], \ + "mode should be 'train', 'test' or 'gen', but got {}".format(mode) + self.mode = mode.lower() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download( + data_file, URL_TRAIN, MD5_TRAIN, 'wmt14', download) + + # read dataset into memory + assert dict_size > 0, "dict_size should be set as positive number" + self.dict_size = dict_size + self._load_data() + + def _load_data(self): + def __to_dict(fd, size): + out_dict = dict() + for line_count, line in enumerate(fd): + if line_count < size: + out_dict[cpt.to_text(line.strip())] = line_count + else: + break + return out_dict + + self.src_ids = [] + self.trg_ids = [] + self.trg_ids_next = [] + with tarfile.open(self.data_file, mode='r') as f: + names = [ + each_item.name for each_item in f + if each_item.name.endswith("src.dict") + ] + assert len(names) == 1 + self.src_dict = __to_dict(f.extractfile(names[0]), self.dict_size) + names = [ + each_item.name for each_item in f + if each_item.name.endswith("trg.dict") + ] + assert len(names) == 1 + self.trg_dict = __to_dict(f.extractfile(names[0]), self.dict_size) + + file_name = "{}/{}".format(self.mode, self.mode) + names = [ + each_item.name for each_item in f + if each_item.name.endswith(file_name) + ] + for name in names: + for line in f.extractfile(name): + line = cpt.to_text(line) + line_split = line.strip().split('\t') + if len(line_split) != 2: + continue + src_seq = line_split[0] # one source sequence + src_words = src_seq.split() + src_ids = [ + self.src_dict.get(w, UNK_IDX) + for w in [START] + src_words + [END] + ] + + trg_seq = line_split[1] # one target sequence + trg_words = trg_seq.split() + trg_ids = [self.trg_dict.get(w, UNK_IDX) for w in trg_words] + + # remove sequence whose length > 80 in training mode + if len(src_ids) > 80 or len(trg_ids) > 80: + continue + trg_ids_next = trg_ids + [self.trg_dict[END]] + trg_ids = [self.trg_dict[START]] + trg_ids + + self.src_ids.append(src_ids) + self.trg_ids.append(trg_ids) + self.trg_ids_next.append(trg_ids_next) + + def __getitem__(self, idx): + return (np.array(self.src_ids[idx]), np.array(self.trg_ids[idx]), + np.array(self.trg_ids_next[idx])) + + def __len__(self): + return len(self.src_ids) + + def get_dict(self, reverse=False): + if reverse: + src_dict = {v: k for k, v in six.iteritems(src_dict)} + trg_dict = {v: k for k, v in six.iteritems(trg_dict)} + return src_dict, trg_dict diff --git a/python/paddle/incubate/hapi/datasets/wmt16.py b/python/paddle/incubate/hapi/datasets/wmt16.py new file mode 100644 index 0000000000000000000000000000000000000000..6d3cb8bfacadd15f6c0f973a09dbf544bbc396c0 --- /dev/null +++ b/python/paddle/incubate/hapi/datasets/wmt16.py @@ -0,0 +1,247 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +""" + +from __future__ import print_function + +import os +import six +import tarfile +import numpy as np +from collections import defaultdict + +import paddle +from paddle.io import Dataset +import paddle.compat as cpt +from .utils import _check_exists_and_download + +__all__ = ['WMT16'] + +DATA_URL = ("http://paddlemodels.bj.bcebos.com/wmt/wmt16.tar.gz") +DATA_MD5 = "0c38be43600334966403524a40dcd81e" + +TOTAL_EN_WORDS = 11250 +TOTAL_DE_WORDS = 19220 + +START_MARK = "" +END_MARK = "" +UNK_MARK = "" + + +class WMT16(Dataset): + """ + Implementation of `WMT16 `_ test dataset. + ACL2016 Multimodal Machine Translation. Please see this website for more + details: http://www.statmt.org/wmt16/multimodal-task.html#task1 + + If you use the dataset created for your task, please cite the following paper: + Multi30K: Multilingual English-German Image Descriptions. + + .. code-block:: text + + @article{elliott-EtAl:2016:VL16, + author = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.}, + title = {Multi30K: Multilingual English-German Image Descriptions}, + booktitle = {Proceedings of the 6th Workshop on Vision and Language}, + year = {2016}, + pages = {70--74}, + year = 2016 + } + + Args: + data_file(str): path to data tar file, can be set None if + :attr:`download` is True. Default None + mode(str): 'train', 'test' or 'val'. Default 'train' + src_dict_size(int): word dictionary size for source language word. Default -1. + trg_dict_size(int): word dictionary size for target language word. Default -1. + lang(str): source language, 'en' or 'de'. Default 'en'. + download(bool): whether to download dataset automatically if + :attr:`data_file` is not set. Default True + + Returns: + Dataset: instance of WMT16 dataset + + Examples: + + .. code-block:: python + + import paddle + from paddle.incubate.hapi.datasets import WMT16 + + class SimpleNet(paddle.nn.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + + def forward(self, src_ids, trg_ids, trg_ids_next): + return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next) + + paddle.disable_static() + + wmt16 = WMT16(mode='train', src_dict_size=50, trg_dict_size=50) + + for i in range(10): + src_ids, trg_ids, trg_ids_next = wmt16[i] + src_ids = paddle.to_tensor(src_ids) + trg_ids = paddle.to_tensor(trg_ids) + trg_ids_next = paddle.to_tensor(trg_ids_next) + + model = SimpleNet() + src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next) + print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy()) + + """ + + def __init__(self, + data_file=None, + mode='train', + src_dict_size=-1, + trg_dict_size=-1, + lang='en', + download=True): + assert mode.lower() in ['train', 'test', 'val'], \ + "mode should be 'train', 'test' or 'val', but got {}".format(mode) + self.mode = mode.lower() + + self.data_file = data_file + if self.data_file is None: + assert download, "data_file is not set and downloading automatically is disabled" + self.data_file = _check_exists_and_download( + data_file, DATA_URL, DATA_MD5, 'wmt16', download) + + self.lang = lang + assert src_dict_size > 0, "dict_size should be set as positive number" + assert trg_dict_size > 0, "dict_size should be set as positive number" + self.src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if lang == "en" + else TOTAL_DE_WORDS)) + self.trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if lang == "en" + else TOTAL_EN_WORDS)) + + # load source and target word dict + self.src_dict = self._load_dict(lang, src_dict_size) + self.trg_dict = self._load_dict("de" if lang == "en" else "en", + trg_dict_size) + + # load data + self.data = self._load_data() + + def _load_dict(self, lang, dict_size, reverse=False): + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, + "wmt16/%s_%d.dict" % (lang, dict_size)) + dict_found = False + if os.path.exists(dict_path): + with open(dict_path, "rb") as d: + dict_found = len(d.readlines()) == dict_size + if not dict_found: + self._build_dict(dict_path, dict_size, lang) + + word_dict = {} + with open(dict_path, "rb") as fdict: + for idx, line in enumerate(fdict): + if reverse: + word_dict[idx] = cpt.to_text(line.strip()) + else: + word_dict[cpt.to_text(line.strip())] = idx + return word_dict + + def _build_dict(self, dict_path, dict_size, lang): + word_dict = defaultdict(int) + with tarfile.open(self.data_file, mode="r") as f: + for line in f.extractfile("wmt16/train"): + line = cpt.to_text(line) + line_split = line.strip().split("\t") + if len(line_split) != 2: continue + sen = line_split[0] if self.lang == "en" else line_split[1] + for w in sen.split(): + word_dict[w] += 1 + + with open(dict_path, "wb") as fout: + fout.write( + cpt.to_bytes("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))) + for idx, word in enumerate( + sorted( + six.iteritems(word_dict), + key=lambda x: x[1], + reverse=True)): + if idx + 3 == dict_size: break + fout.write(cpt.to_bytes(word[0])) + fout.write(cpt.to_bytes('\n')) + + def _load_data(self): + # the index for start mark, end mark, and unk are the same in source + # language and target language. Here uses the source language + # dictionary to determine their indices. + start_id = self.src_dict[START_MARK] + end_id = self.src_dict[END_MARK] + unk_id = self.src_dict[UNK_MARK] + + src_col = 0 if self.lang == "en" else 1 + trg_col = 1 - src_col + + self.src_ids = [] + self.trg_ids = [] + self.trg_ids_next = [] + with tarfile.open(self.data_file, mode="r") as f: + for line in f.extractfile("wmt16/{}".format(self.mode)): + line = cpt.to_text(line) + line_split = line.strip().split("\t") + if len(line_split) != 2: + continue + src_words = line_split[src_col].split() + src_ids = [start_id] + [ + self.src_dict.get(w, unk_id) for w in src_words + ] + [end_id] + + trg_words = line_split[trg_col].split() + trg_ids = [self.trg_dict.get(w, unk_id) for w in trg_words] + + trg_ids_next = trg_ids + [end_id] + trg_ids = [start_id] + trg_ids + + self.src_ids.append(src_ids) + self.trg_ids.append(trg_ids) + self.trg_ids_next.append(trg_ids_next) + + def __getitem__(self, idx): + return (np.array(self.src_ids[idx]), np.array(self.trg_ids[idx]), + np.array(self.trg_ids_next[idx])) + + def __len__(self): + return len(self.src_ids) + + def get_dict(self, lang, reverse=False): + """ + return the word dictionary for the specified language. + + Args: + lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + reverse(bool): If reverse is set to False, the returned python + dictionary will use word as key and use index as value. + If reverse is set to True, the returned python + dictionary will use index as key and word as value. + + Returns: + dict: The word dictionary for the specific language. + """ + + dict_size = self.src_dict_size if lang == self.lang else self.trg_dict_size + + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, + "wmt16/%s_%d.dict" % (lang, dict_size)) + assert os.path.exists(dict_path), "Word dictionary does not exist. " + "Please invoke paddle.dataset.wmt16.train/test/validation first " + "to build the dictionary." + return _load_dict(lang, dict_size) diff --git a/python/paddle/incubate/hapi/distributed.py b/python/paddle/incubate/hapi/distributed.py index 585f466ea6a1ef5a3d888b7c46fe2908ffd2c769..0e38dc8edc758e9c1b8a96add1df242fb0aecef1 100644 --- a/python/paddle/incubate/hapi/distributed.py +++ b/python/paddle/incubate/hapi/distributed.py @@ -49,6 +49,13 @@ class DistributedBatchSampler(BatchSampler): `__len__` for BatchSampler to get sample number of data source. batch_size(int): sample indice number in a mini-batch indices. + num_replicas(int, optional): porcess number in distributed training. + If :attr:`num_replicas` is None, :attr:`num_replicas` will be + retrieved from :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. + Default None. + rank(int, optional): the rank of the current process among :attr:`num_replicas` + processes. If :attr:`rank` is None, :attr:`rank` is retrieved from + :code:`paddle.fluid.dygraph.parallel.ParallenEnv`. Default None. shuffle(bool): whther to shuffle indices order before genrating batch indices. Default False. drop_last(bool): whether drop the last incomplete batch dataset size @@ -84,7 +91,13 @@ class DistributedBatchSampler(BatchSampler): break """ - def __init__(self, dataset, batch_size, shuffle=False, drop_last=False): + def __init__(self, + dataset, + batch_size, + num_replicas=None, + rank=None, + shuffle=False, + drop_last=False): self.dataset = dataset assert isinstance(batch_size, int) and batch_size > 0, \ @@ -96,9 +109,21 @@ class DistributedBatchSampler(BatchSampler): assert isinstance(drop_last, bool), \ "drop_last should be a boolean number" + if num_replicas is not None: + assert isinstance(num_replicas, int) and num_replicas > 0, \ + "num_replicas should be a positive integer" + self.nranks = num_replicas + else: + self.nranks = ParallelEnv().nranks + + if rank is not None: + assert isinstance(rank, int) and rank >= 0, \ + "rank should be a non-negative integer" + self.local_rank = rank + else: + self.local_rank = ParallelEnv().local_rank + self.drop_last = drop_last - self.nranks = ParallelEnv().nranks - self.local_rank = ParallelEnv().local_rank self.epoch = 0 self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks)) self.total_size = self.num_samples * self.nranks diff --git a/python/paddle/incubate/hapi/metrics.py b/python/paddle/incubate/hapi/metrics.py deleted file mode 100644 index 9e9a2e78524022d7de8ca80a7fb8e3c478dacd36..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/metrics.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import abc -import numpy as np -import paddle.fluid as fluid - -import logging - -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - -__all__ = ['Metric', 'Accuracy'] - - -@six.add_metaclass(abc.ABCMeta) -class Metric(object): - """ - Base class for metric, encapsulates metric logic and APIs - Usage: - - m = SomeMetric() - for prediction, label in ...: - m.update(prediction, label) - m.accumulate() - - Advanced usage for :code:`add_metric_op` - Metric calculation can be accelerated by calculating metric states - from model outputs and labels by Paddle OPs in :code:`add_metric_op`, - metric states will be fetch as numpy array and call :code:`update` - with states in numpy format. - Metric calculated as follows (operations in Model and Metric are - indicated with curly brackets, while data nodes not): - inputs & labels || ------------------ - | || - {model} || - | || - outputs & labels || - | || tensor data - {Metric.add_metric_op} || - | || - metric states(tensor) || - | || - {fetch as numpy} || ------------------ - | || - metric states(numpy) || numpy data - | || - {Metric.update} \/ ------------------ - Examples: - - For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label` - as inputs, we can calculate the correct prediction matrix between - :code:`pred` and :code:`label` in :code:`add_metric_op`. - For examples, prediction results contains 10 classes, while :code:`pred` - shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size, - and we only need to calculate accurary of top-1 and top-5, we could - calculated the correct prediction matrix of the top-5 scores of the - prediction of each sample like follows, while the correct prediction - matrix shape is [N, 5]. - .. code-block:: python - def add_metric_op(pred, label): - # sort prediction and slice the top-5 scores - pred = fluid.layers.argsort(pred, descending=True)[1][:, :5] - # calculate whether the predictions are correct - correct = pred == label - return fluid.layers.cast(correct, dtype='float32') - With the :code:`add_metric_op`, we split some calculations to OPs(which - may run on GPU devices, will be faster), and only fetch 1 tensor with - shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1]. - :code:`update` can be define as follows: - .. code-block:: python - def update(self, correct): - accs = [] - for i, k in enumerate(self.topk): - num_corrects = correct[:, :k].sum() - num_samples = len(correct) - accs.append(float(num_corrects) / num_samples) - self.total[i] += num_corrects - self.count[i] += num_samples - return accs - """ - - def __init__(self): - pass - - @abc.abstractmethod - def reset(self): - """ - Reset states and result - """ - raise NotImplementedError("function 'reset' not implemented in {}.". - format(self.__class__.__name__)) - - @abc.abstractmethod - def update(self, *args): - """ - Update states for metric - - Inputs of :code:`update` is the outputs of :code:`Metric.add_metric_op`, - if :code:`add_metric_op` is not defined, the inputs of :code:`update` - will be flatten arguments of **output** of mode and **label** from data: - :code:`update(output1, output2, ..., label1, label2,...)` - - see :code:`Metric.add_metric_op` - """ - raise NotImplementedError("function 'update' not implemented in {}.". - format(self.__class__.__name__)) - - @abc.abstractmethod - def accumulate(self): - """ - Accumulates statistics, computes and returns the metric value - """ - raise NotImplementedError( - "function 'accumulate' not implemented in {}.".format( - self.__class__.__name__)) - - @abc.abstractmethod - def name(self): - """ - Returns metric name - """ - raise NotImplementedError("function 'name' not implemented in {}.". - format(self.__class__.__name__)) - - def add_metric_op(self, *args): - """ - This API is advanced usage to accelerate metric calculating, calulations - from outputs of model to the states which should be updated by Metric can - be defined here, where Paddle OPs is also supported. Outputs of this API - will be the inputs of "Metric.update". - - If :code:`add_metric_op` is defined, it will be called with **outputs** - of model and **labels** from data as arguments, all outputs and labels - will be concatenated and flatten and each filed as a separate argument - as follows: - :code:`add_metric_op(output1, output2, ..., label1, label2,...)` - - If :code:`add_metric_op` is not defined, default behaviour is to pass - input to output, so output format will be: - :code:`return output1, output2, ..., label1, label2,...` - - see :code:`Metric.update` - """ - return args - - -class Accuracy(Metric): - """ - Encapsulates accuracy metric logic - - Examples: - - .. code-block:: python - - import paddle - import paddle.fluid as fluid - import paddle.incubate.hapi as hapi - - fluid.enable_dygraph() - - train_dataset = hapi.datasets.MNIST(mode='train') - - model = hapi.Model(hapi.vision.LeNet(classifier_activation=None)) - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) - model.prepare( - optim, - loss_function=paddle.nn.CrossEntropyLoss(), - metrics=hapi.metrics.Accuracy()) - - model.fit(train_dataset, batch_size=64) - - """ - - def __init__(self, topk=(1, ), name=None, *args, **kwargs): - super(Accuracy, self).__init__(*args, **kwargs) - self.topk = topk - self.maxk = max(topk) - self._init_name(name) - self.reset() - - def add_metric_op(self, pred, label, *args): - pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk] - correct = pred == label - return fluid.layers.cast(correct, dtype='float32') - - def update(self, correct, *args): - accs = [] - for i, k in enumerate(self.topk): - num_corrects = correct[:, :k].sum() - num_samples = len(correct) - accs.append(float(num_corrects) / num_samples) - self.total[i] += num_corrects - self.count[i] += num_samples - return accs - - def reset(self): - self.total = [0.] * len(self.topk) - self.count = [0] * len(self.topk) - - def accumulate(self): - res = [] - for t, c in zip(self.total, self.count): - res.append(float(t) / c) - return res - - def _init_name(self, name): - name = name or 'acc' - if self.maxk != 1: - self._name = ['{}_top{}'.format(name, k) for k in self.topk] - else: - self._name = [name] - - def name(self): - return self._name diff --git a/python/paddle/incubate/hapi/model.py b/python/paddle/incubate/hapi/model.py index 0b12987b10a0510e1035e2b64439de9abe3fcf31..c12df569790f7cd914da6fdaa4a6a84ac6bfd4d5 100644 --- a/python/paddle/incubate/hapi/model.py +++ b/python/paddle/incubate/hapi/model.py @@ -24,7 +24,10 @@ import six import warnings from collections import Iterable +import paddle from paddle import fluid +# Note: Use alias `Input` temporarily before releasing hapi feature. +from paddle.static import InputSpec as Input from paddle.fluid.framework import in_dygraph_mode, Variable from paddle.fluid.executor import global_scope from paddle.fluid.io import is_belong_to_optimizer @@ -34,9 +37,9 @@ from paddle.fluid.layers.utils import flatten from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy from paddle.fluid.incubate.fleet.base import role_maker from paddle.io import DataLoader, Dataset +from paddle.metric import Metric from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized -from .metrics import Metric from .callbacks import config_callbacks from .utils import to_list, to_numpy, flatten_list, restore_flatten_list, extract_args from .device import _get_device @@ -47,40 +50,6 @@ __all__ = [ ] -class Input(fluid.dygraph.Layer): - """ - Define inputs the model. - - Args: - name (str): The name/alias of the variable, see :ref:`api_guide_Name` - for more details. - shape (tuple(integers)|list[integers]): List|Tuple of integers - declaring the shape. You can set "None" or -1 at a dimension - to indicate the dimension can be of any size. For example, - it is useful to set changeable batch size as "None" or -1. - dtype (np.dtype|VarType|str, optional): The type of the data. Supported - dtype: bool, float16, float32, float64, int8, int16, int32, int64, - uint8. Default: float32. - - Examples: - .. code-block:: python - - import paddle.incubate.hapi as hapi - - input = hapi.Input('x', [None, 784], 'float32') - label = hapi.Input('label', [None, 1], 'int64') - """ - - def __init__(self, name, shape=None, dtype='float32'): - super(Input, self).__init__() - self.shape = shape - self.dtype = dtype - self.name = name - - def forward(self): - return fluid.data(self.name, shape=self.shape, dtype=self.dtype) - - class StaticGraphAdapter(object): """ Model traning/inference with a static graph. @@ -388,13 +357,13 @@ class StaticGraphAdapter(object): with fluid.program_guard(prog, self._startup_prog): inputs = self.model._inputs labels = self.model._labels if self.model._labels else [] - inputs = [k.forward() for k in to_list(inputs)] - labels = [k.forward() for k in to_list(labels)] + inputs = [k._create_feed_layer() for k in to_list(inputs)] + labels = [k._create_feed_layer() for k in to_list(labels)] self._label_vars[mode] = labels outputs = to_list(self.model.network.forward(*inputs)) - if mode != 'test' and self.model._loss_function: - losses = self.model._loss_function(*(outputs + labels)) + if mode != 'test' and self.model._loss: + losses = self.model._loss(*(outputs + labels)) if self._nranks > 1 and mode != 'train': outputs = [_all_gather(o, self._nranks) for o in outputs] @@ -403,8 +372,7 @@ class StaticGraphAdapter(object): if mode != 'test': for metric in self.model._metrics: - metrics.append( - to_list(metric.add_metric_op(*(outputs + labels)))) + metrics.append(to_list(metric.compute(*(outputs + labels)))) if mode == 'train' and self.model._optimizer: self._loss_endpoint = fluid.layers.sum(losses) @@ -509,7 +477,7 @@ class DynamicGraphAdapter(object): if self._nranks > 1: outputs = self.ddp_model.forward(* [to_variable(x) for x in inputs]) - losses = self.model._loss_function(*(to_list(outputs) + labels)) + losses = self.model._loss(*(to_list(outputs) + labels)) losses = to_list(losses) final_loss = fluid.layers.sum(losses) final_loss = self.ddp_model.scale_loss(final_loss) @@ -518,7 +486,7 @@ class DynamicGraphAdapter(object): else: outputs = self.model.network.forward( * [to_variable(x) for x in inputs]) - losses = self.model._loss_function(*(to_list(outputs) + labels)) + losses = self.model._loss(*(to_list(outputs) + labels)) losses = to_list(losses) final_loss = fluid.layers.sum(losses) final_loss.backward() @@ -527,7 +495,7 @@ class DynamicGraphAdapter(object): self.model.network.clear_gradients() metrics = [] for metric in self.model._metrics: - metric_outs = metric.add_metric_op(*(to_list(outputs) + labels)) + metric_outs = metric.compute(*(to_list(outputs) + labels)) m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)]) metrics.append(m) @@ -542,8 +510,8 @@ class DynamicGraphAdapter(object): labels = [to_variable(l) for l in to_list(labels)] outputs = self.model.network.forward(* [to_variable(x) for x in inputs]) - if self.model._loss_function: - losses = self.model._loss_function(*(to_list(outputs) + labels)) + if self.model._loss: + losses = self.model._loss(*(to_list(outputs) + labels)) losses = to_list(losses) if self._nranks > 1: @@ -571,13 +539,13 @@ class DynamicGraphAdapter(object): self._merge_count[self.mode + '_total'] += samples self._merge_count[self.mode + '_batch'] = samples - metric_outs = metric.add_metric_op(*(to_list(outputs) + labels)) + metric_outs = metric.compute(*(to_list(outputs) + labels)) m = metric.update(* [to_numpy(m) for m in to_list(metric_outs)]) metrics.append(m) - if self.model._loss_function and len(metrics): + if self.model._loss and len(metrics): return [to_numpy(l) for l in losses], metrics - elif self.model._loss_function: + elif self.model._loss: return [to_numpy(l) for l in losses] else: return metrics @@ -665,21 +633,21 @@ class Model(object): """ An Model object is network with training and inference features. Dynamic graph and static graph are supported at the same time, - switched by `fluid.enable_dygraph()`. The usage is as follows. + switched by `paddle.disable_static()`. The usage is as follows. But note, the switching between dynamic and static should be before instantiating a Model. The input description, i.e, hapi.Input, must be required for static graph. Args: - network (fluid.dygraph.Layer): The network is an instance of - fluid.dygraph.Layer. + network (paddle.nn.Layer): The network is an instance of + paddle.nn.Layer. inputs (Input|list|dict|None): `inputs`, entry points of network, could be a Input layer, or lits of Input layers, or dict (name: Input), or None. For static graph, inputs must be set. For dynamic graph, it could be None. labels (Input|list|None): `labels`, entry points of network, could be a Input layer or lits of Input layers, or None. - For static graph, if labels is required in loss_function, + For static graph, if labels is required in loss, labels must be set. Otherwise, it could be None. @@ -687,13 +655,12 @@ class Model(object): .. code-block:: python import paddle - import paddle.fluid as fluid import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self, classifier_act=None): super(MyNet, self).__init__() - self._fc1 = fluid.dygraph.Linear(784, 200, act=classifier_act) + self._fc1 = paddle.nn.Linear(784, 200, act=classifier_act) def forward(self, x): y = self._fc1(x) @@ -701,18 +668,18 @@ class Model(object): device = hapi.set_device('gpu') # if use static graph, do not set - fluid.enable_dygraph(device) + paddle.disable_static(device) # inputs and labels are not required for dynamic graph. - input = hapi.Input('x', [None, 784], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([None, 784], 'float32', 'x') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(MyNet(), input, label) - optim = fluid.optimizer.SGD(learning_rate=1e-3, + optim = paddle.optimizer.SGD(learning_rate=1e-3, parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), - hapi.metrics.Accuracy()) + paddle.metric.Accuracy()) mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False) model.fit(mnist_data, epochs=2, batch_size=32, verbose=1) @@ -724,7 +691,7 @@ class Model(object): self.network = network self._inputs = None self._labels = None - self._loss_function = None + self._loss = None self._loss_weights = None self._optimizer = None self._optimizer = None @@ -734,16 +701,8 @@ class Model(object): if not isinstance(inputs, (list, dict, Input)): raise TypeError( "'inputs' must be list or dict in static graph mode") - if inputs is None: - self._inputs = [Input(name=n) \ - for n in extract_args(self.network.forward) if n != 'self'] - elif isinstance(input, dict): - self._inputs = [inputs[n] \ - for n in extract_args(self.network.forward) if n != 'self'] - else: - self._inputs = to_list(inputs) - - self._labels = to_list(labels) + self._inputs = self._verify_spec(inputs, True) + self._labels = self._verify_spec(labels) # init backend if fluid.in_dygraph_mode(): @@ -772,25 +731,24 @@ class Model(object): import numpy as np import paddle - import paddle.fluid as fluid import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self, classifier_act=None): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(784, 10, act=classifier_act) + self._fc = paddle.nn.Linear(784, 10, act=classifier_act) def forward(self, x): y = self._fc(x) return y device = hapi.set_device('gpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) - input = hapi.Input('x', [None, 784], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([None, 784], 'float32', 'x') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(MyNet(), input, label) - optim = fluid.optimizer.SGD(learning_rate=1e-3, + optim = paddle.optimizer.SGD(learning_rate=1e-3, parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) data = np.random.random(size=(4,784)).astype(np.float32) @@ -821,25 +779,24 @@ class Model(object): import numpy as np import paddle - import paddle.fluid as fluid import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self, classifier_act=None): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(784, 10, act=classifier_act) + self._fc = paddle.nn.Linear(784, 10, act=classifier_act) def forward(self, x): y = self._fc(x) return y device = hapi.set_device('gpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) - input = hapi.Input('x', [None, 784], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([None, 784], 'float32', 'x') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(MyNet(), input, label) - optim = fluid.optimizer.SGD(learning_rate=1e-3, + optim = paddle.optimizer.SGD(learning_rate=1e-3, parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) @@ -867,24 +824,24 @@ class Model(object): .. code-block:: python import numpy as np - import paddle.fluid as fluid + import paddle import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + self._fc = paddle.nn.Linear(784, 1, act='softmax') def forward(self, x): y = self._fc(x) return y device = hapi.set_device('gpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) model = hapi.Model(MyNet()) model.prepare() data = np.random.random(size=(4,784)).astype(np.float32) - out = model.eval_batch([data]) + out = model.test_batch([data]) print(out) """ return self._adapter.test_batch(inputs) @@ -915,19 +872,19 @@ class Model(object): .. code-block:: python - import paddle.fluid as fluid + import paddle import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + self._fc = paddle.nn.Linear(784, 1, act='softmax') def forward(self, x): y = self._fc(x) return y device = hapi.set_device('cpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) model = hapi.Model(MyNet()) model.save('checkpoint/test') """ @@ -967,19 +924,19 @@ class Model(object): .. code-block:: python - import paddle.fluid as fluid + import paddle import paddle.incubate.hapi as hapi - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(784, 1, act='softmax') + self._fc = paddle.nn.Linear(784, 1, act='softmax') def forward(self, x): y = self._fc(x) return y device = hapi.set_device('cpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) model = hapi.Model(MyNet()) model.load('checkpoint/test') """ @@ -1042,24 +999,24 @@ class Model(object): .. code-block:: python - import paddle.fluid as fluid + import paddle from paddle.incubate.hapi import Model - class MyNet(fluid.dygraph.Layer): + class MyNet(paddle.nn.Layer): def __init__(self): super(MyNet, self).__init__() - self._fc = fluid.dygraph.Linear(20, 10, act='softmax') + self._fc = paddle.nn.Linear(20, 10, act='softmax') def forward(self, x): y = self._fc(x) return y - fluid.enable_dygraph() + paddle.disable_static() model = Model(MyNet()) params = model.parameters() """ return self._adapter.parameters() - def prepare(self, optimizer=None, loss_function=None, metrics=None): + def prepare(self, optimizer=None, loss=None, metrics=None): """ Configures the model before runing. @@ -1067,8 +1024,8 @@ class Model(object): optimizer (Optimizer|None): Optimizer must be set in training and should be a Optimizer instance. It can be None in eval and test mode. - loss_function (Loss|callable function|None): Loss function can - be a `fluid.dygraph.Layer` instance or any callable function + loss (Loss|callable function|None): Loss function can + be a `paddle.nn.Layer` instance or any callable function taken the predicted values and ground truth values as input. It can be None when there is no loss. metrics (Metric|list of Metric|None): If metrics is set, all @@ -1087,7 +1044,7 @@ class Model(object): startup_prog_seed = fluid.default_startup_program( ).random_seed fluid.disable_dygraph() - fluid.enable_dygraph(self._place) + paddle.disable_static(self._place) # enable_dygraph would create and switch to a new program, # thus also copy seed to the new program fluid.default_main_program().random_seed = main_prog_seed @@ -1099,12 +1056,11 @@ class Model(object): _parallel_context_initialized = True self._optimizer = optimizer - if loss_function: - if not isinstance(loss_function, fluid.dygraph.Layer) or \ - not callable(loss_function): - raise TypeError("'loss_function' must be sub classes of \ - `fluid.dygraph.Layer` or any callable function.") - self._loss_function = loss_function + if loss is not None: + if not isinstance(loss, paddle.nn.Layer) and not callable(loss): + raise TypeError("'loss' must be sub classes of " \ + "`paddle.nn.Layer` or any callable function.") + self._loss = loss metrics = metrics or [] for metric in to_list(metrics): @@ -1184,27 +1140,26 @@ class Model(object): .. code-block:: python import paddle - import paddle.fluid as fluid import paddle.incubate.hapi as hapi dynamic = True device = hapi.set_device('gpu') - fluid.enable_dygraph(device) if dynamic else None + paddle.disable_static(device) if dynamic else None train_dataset = hapi.datasets.MNIST(mode='train') val_dataset = hapi.datasets.MNIST(mode='test') - input = hapi.Input('image', [None, 1, 28, 28], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([None, 1, 28, 28], 'float32', 'image') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), input, label) - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) model.prepare( optim, paddle.nn.CrossEntropyLoss(), - hapi.metrics.Accuracy(topk=(1, 2))) + paddle.metric.Accuracy(topk=(1, 2))) model.fit(train_dataset, val_dataset, epochs=2, @@ -1217,31 +1172,30 @@ class Model(object): .. code-block:: python import paddle - import paddle.fluid as fluid import paddle.incubate.hapi as hapi dynamic = True device = hapi.set_device('gpu') - fluid.enable_dygraph(device) if dynamic else None + paddle.disable_static(device) if dynamic else None train_dataset = hapi.datasets.MNIST(mode='train') - train_loader = fluid.io.DataLoader(train_dataset, + train_loader = paddle.io.DataLoader(train_dataset, places=device, batch_size=64) val_dataset = hapi.datasets.MNIST(mode='test') - val_loader = fluid.io.DataLoader(val_dataset, + val_loader = paddle.io.DataLoader(val_dataset, places=device, batch_size=64) - input = hapi.Input('image', [None, 1, 28, 28], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([None, 1, 28, 28], 'float32', 'image') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(hapi.vision.LeNet(classifier_activation=None), input, label) - optim = fluid.optimizer.Adam( - learning_rate=0.001, parameter_list=model.parameters()) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) model.prepare( optim, paddle.nn.CrossEntropyLoss(), - hapi.metrics.Accuracy(topk=(1, 2))) + paddle.metric.Accuracy(topk=(1, 2))) model.fit(train_loader, val_loader, epochs=2, @@ -1353,24 +1307,24 @@ class Model(object): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle import paddle.incubate.hapi as hapi # declarative mode val_dataset = hapi.datasets.MNIST(mode='test') - input = hapi.Input('image', [-1, 1, 28, 28], 'float32') - label = hapi.Input('label', [None, 1], 'int64') + input = hapi.Input([-1, 1, 28, 28], 'float32', 'image') + label = hapi.Input([None, 1], 'int64', 'label') model = hapi.Model(hapi.vision.LeNet(), input, label) - model.prepare(metrics=hapi.metrics.Accuracy()) + model.prepare(metrics=paddle.metric.Accuracy()) result = model.evaluate(val_dataset, batch_size=64) print(result) # imperative mode - fluid.enable_dygraph() + paddle.disable_static() model = hapi.Model(hapi.vision.LeNet()) - model.prepare(metrics=hapi.metrics.Accuracy()) + model.prepare(metrics=paddle.metric.Accuracy()) result = model.evaluate(val_dataset, batch_size=64) print(result) @@ -1433,12 +1387,13 @@ class Model(object): num_workers (int): The number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this argument will be ignored. Default: 0. - stack_output (bool): Whether stack output field like a batch, as for an output + stack_outputs (bool): Whether stack output field like a batch, as for an output filed of a sample is in shape [X, Y], test_data contains N samples, predict output field will be in shape [N, X, Y] if stack_output is True, and will be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs is False. stack_outputs as False is used for LoDTensor output situation, it is recommended set as True if outputs contains no LoDTensor. Default: False. + callbacks(Callback): A Callback instance, default None. Returns: list: output of models. @@ -1446,7 +1401,7 @@ class Model(object): .. code-block:: python import numpy as np - import paddle.fluid as fluid + import paddle import paddle.incubate.hapi as hapi class MnistDataset(hapi.datasets.MNIST): @@ -1466,7 +1421,7 @@ class Model(object): test_dataset = MnistDataset(mode='test', return_label=False) # declarative mode - input = hapi.Input('image', [-1, 1, 28, 28], 'float32') + input = hapi.Input([-1, 1, 28, 28], 'float32', 'image') model = hapi.Model(hapi.vision.LeNet(), input) model.prepare() @@ -1475,7 +1430,7 @@ class Model(object): # imperative mode device = hapi.set_device('cpu') - fluid.enable_dygraph(device) + paddle.disable_static(device) model = hapi.Model(hapi.vision.LeNet()) model.prepare() result = model.predict(test_dataset, batch_size=64) @@ -1545,10 +1500,9 @@ class Model(object): Examples: .. code-block:: python - import paddle.fluid as fluid import paddle.incubate.hapi as hapi - input = hapi.Input('image', [-1, 1, 28, 28], 'float32') + input = hapi.Input([-1, 1, 28, 28], 'float32', 'image') model = hapi.Model(hapi.vision.LeNet(), input) model.prepare() @@ -1601,9 +1555,9 @@ class Model(object): if mode != 'test': outs = getattr(self, mode + '_batch')(data[:len(self._inputs)], data[len(self._inputs):]) - if self._metrics and self._loss_function: + if self._metrics and self._loss: metrics = [[l[0] for l in outs[0]]] - elif self._loss_function: + elif self._loss: metrics = [[l[0] for l in outs]] else: metrics = [] @@ -1639,12 +1593,42 @@ class Model(object): return logs, outputs return logs + def _verify_spec(self, specs, is_input=False): + out_specs = [] + + if specs is None: + # If not specific specs of `Input`, using argument names of `forward` function + # to generate `Input`. + if is_input: + out_specs = [ + Input(name=n) for n in extract_args(self.network.forward) + if n != 'self' + ] + else: + out_specs = to_list(specs) + elif isinstance(specs, dict): + assert is_input == False + out_specs = [specs[n] \ + for n in extract_args(self.network.forward) if n != 'self'] + else: + out_specs = to_list(specs) + # Note: checks each element has specificed `name`. + if out_specs is not None: + for i, spec in enumerate(out_specs): + assert isinstance(spec, Input) + if spec.name is None: + raise ValueError( + "Requires Input[{}].name != None, but receive `None` with {}.". + format(i, spec)) + + return out_specs + def _reset_metrics(self): for metric in self._metrics: metric.reset() def _metrics_name(self): - metrics_name = ['loss'] if self._loss_function else [] + metrics_name = ['loss'] if self._loss else [] for m in self._metrics: metrics_name.extend(to_list(m.name())) return metrics_name diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py index b338f3310b4c796e66d88b21f1bb8353dbf5b572..ede99a50c2fa72da3bd1999204a5fe1e5a656be2 100644 --- a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py +++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_dynamic.py @@ -25,7 +25,7 @@ from paddle import fluid from paddle.incubate.hapi import Model, Input, set_device from paddle.nn.layer.loss import CrossEntropyLoss from paddle.incubate.hapi.vision.models import LeNet -from paddle.incubate.hapi.metrics import Accuracy +from paddle.metric import Accuracy from paddle.incubate.hapi.callbacks import ProgBarLogger from paddle.incubate.hapi.datasets import MNIST @@ -64,8 +64,8 @@ class TestDistTraning(unittest.TestCase): im_shape = (-1, 1, 28, 28) batch_size = 128 - inputs = [Input('image', im_shape, 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input(im_shape, 'float32', 'image')] + labels = [Input([None, 1], 'int64', 'label')] model = Model(LeNet(classifier_activation=None), inputs, labels) optim = fluid.optimizer.Momentum( diff --git a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py index 1484620a4efdfff0c084153e9edb001833d744ef..28305fc6a6fd08c160f946920e85391cd444caef 100644 --- a/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py +++ b/python/paddle/incubate/hapi/tests/dist_hapi_mnist_static.py @@ -25,7 +25,7 @@ from paddle import fluid from paddle.incubate.hapi import Model, Input, set_device from paddle.nn.layer.loss import CrossEntropyLoss from paddle.incubate.hapi.vision.models import LeNet -from paddle.incubate.hapi.metrics import Accuracy +from paddle.metric import Accuracy from paddle.incubate.hapi.callbacks import ProgBarLogger from paddle.incubate.hapi.datasets import MNIST @@ -63,8 +63,8 @@ class TestDistTraning(unittest.TestCase): im_shape = (-1, 1, 28, 28) batch_size = 128 - inputs = [Input('image', im_shape, 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input(im_shape, 'float32', 'image')] + labels = [Input([None, 1], 'int64', 'label')] model = Model(LeNet(classifier_activation=None), inputs, labels) optim = fluid.optimizer.Momentum( diff --git a/python/paddle/incubate/hapi/tests/test_callbacks.py b/python/paddle/incubate/hapi/tests/test_callbacks.py index 2a8a470736d921628edadb55b7e0cc956e2f37f1..e49bf215c276c8b495b0f991a5821d4c674f48d2 100644 --- a/python/paddle/incubate/hapi/tests/test_callbacks.py +++ b/python/paddle/incubate/hapi/tests/test_callbacks.py @@ -36,7 +36,7 @@ class TestCallbacks(unittest.TestCase): freq = 2 eval_steps = 20 - inputs = [Input('image', [None, 1, 28, 28], 'float32')] + inputs = [Input([None, 1, 28, 28], 'float32', 'image')] lenet = Model(LeNet(), inputs) lenet.prepare() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_cifar.py b/python/paddle/incubate/hapi/tests/test_dataset_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..08d9f4353c0ed639f5ad907c921bf7b2c88271f5 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_cifar.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestCifar10Train(unittest.TestCase): + def test_main(self): + cifar = Cifar10(mode='train') + self.assertTrue(len(cifar) == 50000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 50000) + data, label = cifar[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(data.shape[0] == 3072) + self.assertTrue(0 <= int(label) <= 9) + + +class TestCifar10Test(unittest.TestCase): + def test_main(self): + cifar = Cifar10(mode='test') + self.assertTrue(len(cifar) == 10000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 10000) + data, label = cifar[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(data.shape[0] == 3072) + self.assertTrue(0 <= int(label) <= 9) + + +class TestCifar100Train(unittest.TestCase): + def test_main(self): + cifar = Cifar100(mode='train') + self.assertTrue(len(cifar) == 50000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 50000) + data, label = cifar[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(data.shape[0] == 3072) + self.assertTrue(0 <= int(label) <= 99) + + +class TestCifar100Test(unittest.TestCase): + def test_main(self): + cifar = Cifar100(mode='test') + self.assertTrue(len(cifar) == 10000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 10000) + data, label = cifar[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(data.shape[0] == 3072) + self.assertTrue(0 <= int(label) <= 99) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_conll05.py b/python/paddle/incubate/hapi/tests/test_dataset_conll05.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed2a4180d0cb341f5d57bdf1cb9d8ef145a44fb --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_conll05.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestConll05st(unittest.TestCase): + def test_main(self): + conll05st = Conll05st() + self.assertTrue(len(conll05st) == 5267) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 5267) + sample = conll05st[idx] + self.assertTrue(len(sample) == 9) + for s in sample: + self.assertTrue(len(s.shape) == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_imdb.py b/python/paddle/incubate/hapi/tests/test_dataset_imdb.py new file mode 100644 index 0000000000000000000000000000000000000000..cef73634b6b5fb114fa88b785bb77a87fe129bd5 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_imdb.py @@ -0,0 +1,55 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestImdbTrain(unittest.TestCase): + def test_main(self): + imdb = Imdb(mode='train') + self.assertTrue(len(imdb) == 25000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 25000) + data, label = imdb[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(label.shape[0] == 1) + self.assertTrue(int(label) in [0, 1]) + + +class TestImdbTest(unittest.TestCase): + def test_main(self): + imdb = Imdb(mode='test') + self.assertTrue(len(imdb) == 25000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 25000) + data, label = imdb[idx] + self.assertTrue(len(data.shape) == 1) + self.assertTrue(label.shape[0] == 1) + self.assertTrue(int(label) in [0, 1]) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_imikolov.py b/python/paddle/incubate/hapi/tests/test_dataset_imikolov.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d97d314acbf7f55a8482fd386581fef7f16e03 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_imikolov.py @@ -0,0 +1,51 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestImikolovTrain(unittest.TestCase): + def test_main(self): + imikolov = Imikolov(mode='train', data_type='NGRAM', window_size=2) + self.assertTrue(len(imikolov) == 929589) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 929589) + data = imikolov[idx] + self.assertTrue(len(data) == 2) + + +class TestImikolovTest(unittest.TestCase): + def test_main(self): + imikolov = Imikolov(mode='test', data_type='NGRAM', window_size=2) + self.assertTrue(len(imikolov) == 82430) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 82430) + data = imikolov[idx] + self.assertTrue(len(data) == 2) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py b/python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8a7a3035ee0e86f8ee2fa9e8a23f6036758d2d --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_movie_reviews.py @@ -0,0 +1,55 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestMovieReviewsTrain(unittest.TestCase): + def test_main(self): + movie_reviews = MovieReviews(mode='train') + self.assertTrue(len(movie_reviews) == 1600) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1600) + data = movie_reviews[idx] + self.assertTrue(len(data) == 2) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(int(data[1]) in [0, 1]) + + +class TestMovieReviewsTest(unittest.TestCase): + def test_main(self): + movie_reviews = MovieReviews(mode='test') + self.assertTrue(len(movie_reviews) == 400) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 400) + data = movie_reviews[idx] + self.assertTrue(len(data) == 2) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(int(data[1]) in [0, 1]) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_movielens.py b/python/paddle/incubate/hapi/tests/test_dataset_movielens.py new file mode 100644 index 0000000000000000000000000000000000000000..f94269f930e05e04b3bdfc4324e5ae1ea15b1fb9 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_movielens.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestMovielensTrain(unittest.TestCase): + def test_main(self): + movielens = Movielens(mode='train') + # movielens dataset random split train/test + # not check dataset length here + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 900000) + data = movielens[idx] + self.assertTrue(len(data) == 8) + for i, d in enumerate(data): + self.assertTrue(len(d.shape) == 1) + if i not in [5, 6]: + self.assertTrue(d.shape[0] == 1) + + +class TestMovielensTest(unittest.TestCase): + def test_main(self): + movielens = Movielens(mode='test') + # movielens dataset random split train/test + # not check dataset length here + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 100000) + data = movielens[idx] + self.assertTrue(len(data) == 8) + for i, d in enumerate(data): + self.assertTrue(len(d.shape) == 1) + if i not in [5, 6]: + self.assertTrue(d.shape[0] == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py b/python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py new file mode 100644 index 0000000000000000000000000000000000000000..768367bff9911a352ea6b13f279d5b71938bc85b --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_uci_housing.py @@ -0,0 +1,104 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestUCIHousingTrain(unittest.TestCase): + def test_main(self): + uci_housing = UCIHousing(mode='train') + self.assertTrue(len(uci_housing) == 404) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 404) + data = uci_housing[idx] + self.assertTrue(len(data) == 2) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(data[0].shape[0] == 13) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(data[1].shape[0] == 1) + + +class TestUCIHousingTest(unittest.TestCase): + def test_main(self): + uci_housing = UCIHousing(mode='test') + self.assertTrue(len(uci_housing) == 102) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 102) + data = uci_housing[idx] + self.assertTrue(len(data) == 2) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(data[0].shape[0] == 13) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(data[1].shape[0] == 1) + + +class TestWMT14Train(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='train', dict_size=50) + self.assertTrue(len(wmt14) == 191155) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 191155) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT14Test(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='test', dict_size=50) + self.assertTrue(len(wmt14) == 5957) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 5957) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT14Gen(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='gen', dict_size=50) + self.assertTrue(len(wmt14) == 3001) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 3001) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_voc.py b/python/paddle/incubate/hapi/tests/test_dataset_voc.py new file mode 100644 index 0000000000000000000000000000000000000000..85766ab8e30a3a7abd5e2966e6353b116c03e926 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_voc.py @@ -0,0 +1,70 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import voc2012, VOC2012 +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + +# VOC2012 is too large for unittest to download, stub a small dataset here +voc2012.VOC_URL = 'https://paddlemodels.bj.bcebos.com/voc2012_stub/VOCtrainval_11-May-2012.tar' +voc2012.VOC_MD5 = '34cb1fe5bdc139a5454b25b16118fff8' + + +class TestVOC2012Train(unittest.TestCase): + def test_main(self): + voc2012 = VOC2012(mode='train') + self.assertTrue(len(voc2012) == 3) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 3) + image, label = voc2012[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(len(label.shape) == 2) + + +class TestVOC2012Valid(unittest.TestCase): + def test_main(self): + voc2012 = VOC2012(mode='valid') + self.assertTrue(len(voc2012) == 1) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1) + image, label = voc2012[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(len(label.shape) == 2) + + +class TestVOC2012Test(unittest.TestCase): + def test_main(self): + voc2012 = VOC2012(mode='test') + self.assertTrue(len(voc2012) == 2) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1) + image, label = voc2012[idx] + self.assertTrue(len(image.shape) == 3) + self.assertTrue(len(label.shape) == 2) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_dataset_wmt.py b/python/paddle/incubate/hapi/tests/test_dataset_wmt.py new file mode 100644 index 0000000000000000000000000000000000000000..987e55676aadb77582c58b13e626d7258f3c75b5 --- /dev/null +++ b/python/paddle/incubate/hapi/tests/test_dataset_wmt.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import numpy as np +import tempfile +import shutil +import cv2 + +from paddle.incubate.hapi.datasets import * +from paddle.incubate.hapi.datasets.utils import _check_exists_and_download + + +class TestWMT14Train(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='train', dict_size=50) + self.assertTrue(len(wmt14) == 191155) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 191155) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT14Test(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='test', dict_size=50) + self.assertTrue(len(wmt14) == 5957) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 5957) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT14Gen(unittest.TestCase): + def test_main(self): + wmt14 = WMT14(mode='gen', dict_size=50) + self.assertTrue(len(wmt14) == 3001) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 3001) + data = wmt14[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT16Train(unittest.TestCase): + def test_main(self): + wmt16 = WMT16( + mode='train', src_dict_size=50, trg_dict_size=50, lang='en') + self.assertTrue(len(wmt16) == 29000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 29000) + data = wmt16[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT16Test(unittest.TestCase): + def test_main(self): + wmt16 = WMT16( + mode='test', src_dict_size=50, trg_dict_size=50, lang='en') + self.assertTrue(len(wmt16) == 1000) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1000) + data = wmt16[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +class TestWMT16Val(unittest.TestCase): + def test_main(self): + wmt16 = WMT16(mode='val', src_dict_size=50, trg_dict_size=50, lang='en') + self.assertTrue(len(wmt16) == 1014) + + # traversal whole dataset may cost a + # long time, randomly check 1 sample + idx = np.random.randint(0, 1014) + data = wmt16[idx] + self.assertTrue(len(data) == 3) + self.assertTrue(len(data[0].shape) == 1) + self.assertTrue(len(data[1].shape) == 1) + self.assertTrue(len(data[2].shape) == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_metrics.py b/python/paddle/incubate/hapi/tests/test_metrics.py deleted file mode 100644 index 3d25a275d5f1c539ce959c5231a7af771b229836..0000000000000000000000000000000000000000 --- a/python/paddle/incubate/hapi/tests/test_metrics.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import os -import unittest -import numpy as np - -import paddle.fluid as fluid -from paddle.fluid.dygraph.base import to_variable - -from paddle.incubate.hapi.metrics import * -from paddle.incubate.hapi.utils import to_list - - -def accuracy(pred, label, topk=(1, )): - maxk = max(topk) - pred = np.argsort(pred)[:, ::-1][:, :maxk] - correct = (pred == np.repeat(label, maxk, 1)) - - batch_size = label.shape[0] - res = [] - for k in topk: - correct_k = correct[:, :k].sum() - res.append(correct_k / batch_size) - return res - - -def convert_to_one_hot(y, C): - oh = np.random.random((y.shape[0], C)).astype('float32') * .5 - for i in range(y.shape[0]): - oh[i, int(y[i])] = 1. - return oh - - -class TestAccuracyDynamic(unittest.TestCase): - def setUp(self): - self.topk = (1, ) - self.class_num = 5 - self.sample_num = 1000 - self.name = None - - def random_pred_label(self): - label = np.random.randint(0, self.class_num, - (self.sample_num, 1)).astype('int64') - pred = np.random.randint(0, self.class_num, - (self.sample_num, 1)).astype('int32') - pred_one_hot = convert_to_one_hot(pred, self.class_num) - pred_one_hot = pred_one_hot.astype('float32') - - return label, pred_one_hot - - def test_main(self): - with fluid.dygraph.guard(fluid.CPUPlace()): - acc = Accuracy(topk=self.topk, name=self.name) - for _ in range(10): - label, pred = self.random_pred_label() - label_var = to_variable(label) - pred_var = to_variable(pred) - state = to_list(acc.add_metric_op(pred_var, label_var)) - acc.update(* [s.numpy() for s in state]) - res_m = acc.accumulate() - res_f = accuracy(pred, label, self.topk) - assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \ - "Accuracy precision error: {} != {}".format(res_m, res_f) - acc.reset() - assert np.sum(acc.total) == 0 - assert np.sum(acc.count) == 0 - - -class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic): - def setUp(self): - self.topk = (1, 5) - self.class_num = 10 - self.sample_num = 1000 - self.name = "accuracy" - - -class TestAccuracyStatic(TestAccuracyDynamic): - def test_main(self): - main_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(main_prog, startup_prog): - pred = fluid.data( - name='pred', shape=[None, self.class_num], dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - acc = Accuracy(topk=self.topk, name=self.name) - state = acc.add_metric_op(pred, label) - - exe = fluid.Executor(fluid.CPUPlace()) - compiled_main_prog = fluid.CompiledProgram(main_prog) - - for _ in range(10): - label, pred = self.random_pred_label() - state_ret = exe.run(compiled_main_prog, - feed={'pred': pred, - 'label': label}, - fetch_list=[s.name for s in to_list(state)], - return_numpy=True) - acc.update(*state_ret) - res_m = acc.accumulate() - res_f = accuracy(pred, label, self.topk) - assert np.all(np.isclose(np.array(res_m, dtype='float64'), np.array(res_f, dtype='float64'), rtol=1e-3)), \ - "Accuracy precision error: {} != {}".format(res_m, res_f) - acc.reset() - assert np.sum(acc.total) == 0 - assert np.sum(acc.count) == 0 - - -class TestAccuracyStaticMultiTopk(TestAccuracyStatic): - def setUp(self): - self.topk = (1, 5) - self.class_num = 10 - self.sample_num = 1000 - self.name = "accuracy" - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_model.py b/python/paddle/incubate/hapi/tests/test_model.py index f8be2e242568de10bfbf14fb3b88ef88fb0094da..af17c5636426cb6ed7af00f3b483bd286b9f03b8 100644 --- a/python/paddle/incubate/hapi/tests/test_model.py +++ b/python/paddle/incubate/hapi/tests/test_model.py @@ -23,13 +23,13 @@ import shutil import tempfile from paddle import fluid -from paddle.nn import Conv2D, Pool2D, Linear, ReLU, Sequential +from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential from paddle.fluid.dygraph.base import to_variable import paddle.incubate.hapi as hapi from paddle.incubate.hapi import Model, Input from paddle.nn.layer.loss import CrossEntropyLoss -from paddle.incubate.hapi.metrics import Accuracy +from paddle.metric import Accuracy from paddle.incubate.hapi.datasets import MNIST from paddle.incubate.hapi.vision.models import LeNet from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context @@ -40,11 +40,11 @@ class LeNetDygraph(fluid.dygraph.Layer): super(LeNetDygraph, self).__init__() self.num_classes = num_classes self.features = Sequential( - Conv2D( + Conv2d( 1, 6, 3, stride=1, padding=1), ReLU(), Pool2D(2, 'max', 2), - Conv2D( + Conv2d( 6, 16, 5, stride=1, padding=0), ReLU(), Pool2D(2, 'max', 2)) @@ -150,8 +150,8 @@ class TestModel(unittest.TestCase): cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader) - cls.inputs = [Input('image', [-1, 1, 28, 28], 'float32')] - cls.labels = [Input('label', [None, 1], 'int64')] + cls.inputs = [Input([-1, 1, 28, 28], 'float32', 'image')] + cls.labels = [Input([None, 1], 'int64', 'label')] cls.save_dir = tempfile.mkdtemp() cls.weight_path = os.path.join(cls.save_dir, 'lenet') @@ -169,6 +169,12 @@ class TestModel(unittest.TestCase): def test_fit_static(self): self.fit(False) + def test_fit_dynamic_with_rank(self): + self.fit(True, 2, 0) + + def test_fit_static_with_rank(self): + self.fit(False, 2, 0) + def test_evaluate_dygraph(self): self.evaluate(True) @@ -184,7 +190,7 @@ class TestModel(unittest.TestCase): def test_prepare_context(self): prepare_distributed_context() - def fit(self, dynamic): + def fit(self, dynamic, num_replicas=None, rank=None): fluid.enable_dygraph(self.device) if dynamic else None seed = 333 fluid.default_startup_program().random_seed = seed @@ -196,7 +202,7 @@ class TestModel(unittest.TestCase): model = Model(net, inputs=self.inputs, labels=self.labels) model.prepare( optim_new, - loss_function=CrossEntropyLoss(reduction="sum"), + loss=CrossEntropyLoss(reduction="sum"), metrics=Accuracy()) model.fit(self.train_dataset, batch_size=64, shuffle=False) @@ -204,9 +210,17 @@ class TestModel(unittest.TestCase): np.testing.assert_allclose(result['acc'], self.acc1) train_sampler = DistributedBatchSampler( - self.train_dataset, batch_size=64, shuffle=False) + self.train_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) val_sampler = DistributedBatchSampler( - self.val_dataset, batch_size=64, shuffle=False) + self.val_dataset, + batch_size=64, + shuffle=False, + num_replicas=num_replicas, + rank=rank) train_loader = fluid.io.DataLoader( self.train_dataset, @@ -316,11 +330,10 @@ class TestModelFunction(unittest.TestCase): optim2 = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) - inputs = [Input('x', [None, dim], 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input([None, dim], 'float32', 'x')] + labels = [Input([None, 1], 'int64', 'label')] model = Model(net, inputs, labels) - model.prepare( - optim2, loss_function=CrossEntropyLoss(reduction="sum")) + model.prepare(optim2, loss=CrossEntropyLoss(reduction="sum")) loss, = model.train_batch([data], [label]) np.testing.assert_allclose(loss.flatten(), ref.flatten()) @@ -345,7 +358,7 @@ class TestModelFunction(unittest.TestCase): fluid.enable_dygraph(device) if dynamic else None self.set_seed() net = MyModel() - inputs = [Input('x', [None, dim], 'float32')] + inputs = [Input([None, dim], 'float32', 'x')] model = Model(net, inputs) model.prepare() out, = model.test_batch([data]) @@ -359,14 +372,13 @@ class TestModelFunction(unittest.TestCase): device = hapi.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None net = MyModel(classifier_activation=None) - inputs = [Input('x', [None, 20], 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input([None, 20], 'float32', 'x')] + labels = [Input([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) model.prepare( - optimizer=optim, - loss_function=CrossEntropyLoss(reduction="sum")) + optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.save(path + '/test') model.load(path + '/test') shutil.rmtree(path) @@ -380,18 +392,16 @@ class TestModelFunction(unittest.TestCase): model = Model(MyModel(classifier_activation=None)) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) - model.prepare( - optimizer=optim, loss_function=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.save(path + '/test') fluid.disable_dygraph() - inputs = [Input('x', [None, 20], 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input([None, 20], 'float32', 'x')] + labels = [Input([None, 1], 'int64', 'label')] model = Model(MyModel(classifier_activation=None), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) - model.prepare( - optimizer=optim, loss_function=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.load(path + '/test') shutil.rmtree(path) @@ -399,26 +409,24 @@ class TestModelFunction(unittest.TestCase): path = tempfile.mkdtemp() net = MyModel(classifier_activation=None) - inputs = [Input('x', [None, 20], 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input([None, 20], 'float32', 'x')] + labels = [Input([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) - model.prepare( - optimizer=optim, loss_function=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.save(path + '/test') device = hapi.set_device('cpu') fluid.enable_dygraph(device) #if dynamic else None net = MyModel(classifier_activation=None) - inputs = [Input('x', [None, 20], 'float32')] - labels = [Input('label', [None, 1], 'int64')] + inputs = [Input([None, 20], 'float32', 'x')] + labels = [Input([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs, labels) - model.prepare( - optimizer=optim, loss_function=CrossEntropyLoss(reduction="sum")) + model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) model.load(path + '/test') shutil.rmtree(path) fluid.disable_dygraph() @@ -428,7 +436,7 @@ class TestModelFunction(unittest.TestCase): device = hapi.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None net = MyModel() - inputs = [Input('x', [None, 20], 'float32')] + inputs = [Input([None, 20], 'float32', 'x')] model = Model(net, inputs) model.prepare() params = model.parameters() @@ -438,7 +446,7 @@ class TestModelFunction(unittest.TestCase): def test_export_deploy_model(self): net = LeNet() - inputs = [Input('image', [-1, 1, 28, 28], 'float32')] + inputs = [Input([-1, 1, 28, 28], 'float32', 'image')] model = Model(net, inputs) model.prepare() save_dir = tempfile.mkdtemp() @@ -466,5 +474,15 @@ class TestModelFunction(unittest.TestCase): shutil.rmtree(save_dir) +class TestRaiseError(unittest.TestCase): + def test_input_without_name(self): + net = MyModel(classifier_activation=None) + + inputs = [Input([None, 10], 'float32')] + labels = [Input([None, 1], 'int64', 'label')] + with self.assertRaises(ValueError): + model = Model(net, inputs, labels) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/incubate/hapi/tests/test_pretrained_model.py b/python/paddle/incubate/hapi/tests/test_pretrained_model.py index 588797322f4ab8e9eef9cc184cc6d82635de7d01..334ebff449d4f34c9a5a9b56ee7998b4dbc5abf0 100644 --- a/python/paddle/incubate/hapi/tests/test_pretrained_model.py +++ b/python/paddle/incubate/hapi/tests/test_pretrained_model.py @@ -28,7 +28,7 @@ class TestPretrainedModel(unittest.TestCase): fluid.enable_dygraph() net = models.__dict__[arch](pretrained=True, classifier_activation=None) - inputs = [Input('image', [None, 3, 224, 224], 'float32')] + inputs = [Input([None, 3, 224, 224], 'float32', 'image')] model = Model(network=net, inputs=inputs) model.prepare() res = model.test_batch(x) diff --git a/python/paddle/incubate/hapi/tests/test_text.py b/python/paddle/incubate/hapi/tests/test_text.py index 78f089b06a38dec4eb189a9744e503f517f220db..bdc637997b0cbd8389fdfab9f71597c62b0e21a3 100644 --- a/python/paddle/incubate/hapi/tests/test_text.py +++ b/python/paddle/incubate/hapi/tests/test_text.py @@ -142,7 +142,7 @@ class TestBasicLSTM(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -168,7 +168,7 @@ class TestBasicGRU(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -219,8 +219,8 @@ class TestBeamSearch(ModuleApiTest): def make_inputs(self): inputs = [ - Input("init_hidden", [None, self.inputs[0].shape[-1]], "float32"), - Input("init_cell", [None, self.inputs[1].shape[-1]], "float32"), + Input([None, self.inputs[0].shape[-1]], "float32", "init_hidden"), + Input([None, self.inputs[1].shape[-1]], "float32", "init_cell"), ] return inputs @@ -272,10 +272,10 @@ class TestTransformerEncoder(ModuleApiTest): def make_inputs(self): inputs = [ - Input("enc_input", [None, None, self.inputs[0].shape[-1]], - "float32"), - Input("attn_bias", [None, self.inputs[1].shape[1], None, None], - "float32"), + Input([None, None, self.inputs[0].shape[-1]], "float32", + "enc_input"), + Input([None, self.inputs[1].shape[1], None, None], "float32", + "attn_bias"), ] return inputs @@ -336,14 +336,14 @@ class TestTransformerDecoder(TestTransformerEncoder): def make_inputs(self): inputs = [ - Input("dec_input", [None, None, self.inputs[0].shape[-1]], - "float32"), - Input("enc_output", [None, None, self.inputs[0].shape[-1]], - "float32"), - Input("self_attn_bias", - [None, self.inputs[-1].shape[1], None, None], "float32"), - Input("cross_attn_bias", - [None, self.inputs[-1].shape[1], None, None], "float32"), + Input([None, None, self.inputs[0].shape[-1]], "float32", + "dec_input"), + Input([None, None, self.inputs[0].shape[-1]], "float32", + "enc_output"), + Input([None, self.inputs[-1].shape[1], None, None], "float32", + "self_attn_bias"), + Input([None, self.inputs[-1].shape[1], None, None], "float32", + "cross_attn_bias"), ] return inputs @@ -431,10 +431,10 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest): def make_inputs(self): inputs = [ - Input("enc_output", [None, None, self.inputs[0].shape[-1]], - "float32"), - Input("trg_src_attn_bias", - [None, self.inputs[1].shape[1], None, None], "float32"), + Input([None, None, self.inputs[0].shape[-1]], "float32", + "enc_output"), + Input([None, self.inputs[1].shape[1], None, None], "float32", + "trg_src_attn_bias"), ] return inputs @@ -473,9 +473,9 @@ class TestSequenceTagging(ModuleApiTest): def make_inputs(self): inputs = [ - Input("word", [None, None], "int64"), - Input("lengths", [None], "int64"), - Input("target", [None, None], "int64"), + Input([None, None], "int64", "word"), + Input([None], "int64", "lengths"), + Input([None, None], "int64", "target"), ] return inputs @@ -517,7 +517,7 @@ class TestStackedRNN(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -543,7 +543,7 @@ class TestLSTM(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -579,7 +579,7 @@ class TestBiLSTM(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -609,7 +609,7 @@ class TestGRU(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -645,7 +645,7 @@ class TestBiGRU(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, None, self.inputs[-1].shape[-1]], "float32"), + Input([None, None, self.inputs[-1].shape[-1]], "float32", "input"), ] return inputs @@ -680,7 +680,7 @@ class TestCNNEncoder(ModuleApiTest): def make_inputs(self): inputs = [ - Input("input", [None, self.inputs[-1].shape[1], None], "float32"), + Input([None, self.inputs[-1].shape[1], None], "float32", "input"), ] return inputs diff --git a/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py b/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py index c2035a8b5c5958d54c79d6ee0ff6df654bb35d51..26ec53014b1c3b113a0e1ee82f3b9edfe9f48a3f 100644 --- a/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py +++ b/python/paddle/incubate/hapi/tests/test_uncombined_weight2state_dict.py @@ -22,7 +22,7 @@ import shutil import tempfile from paddle import fluid -from paddle.nn import Conv2D, Pool2D, Linear, ReLU, Sequential +from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential from paddle.incubate.hapi.utils import uncombined_weight_to_state_dict @@ -32,11 +32,11 @@ class LeNetDygraph(fluid.dygraph.Layer): super(LeNetDygraph, self).__init__() self.num_classes = num_classes self.features = Sequential( - Conv2D( + Conv2d( 1, 6, 3, stride=1, padding=1), ReLU(), Pool2D(2, 'max', 2), - Conv2D( + Conv2d( 6, 16, 5, stride=1, padding=0), ReLU(), Pool2D(2, 'max', 2)) diff --git a/python/paddle/incubate/hapi/tests/test_vision_models.py b/python/paddle/incubate/hapi/tests/test_vision_models.py index 16dbe431be801c9cd7ce48c4cd1444b7e0e558a4..2dc9355bcc3005d48b7046123b024fa2a91594c3 100644 --- a/python/paddle/incubate/hapi/tests/test_vision_models.py +++ b/python/paddle/incubate/hapi/tests/test_vision_models.py @@ -28,7 +28,7 @@ class TestVisonModels(unittest.TestCase): else: net = models.__dict__[arch](pretrained=pretrained) - input = hapi.Input('image', [None, 3, 224, 224], 'float32') + input = hapi.Input([None, 3, 224, 224], 'float32', 'image') model = hapi.Model(net, input) model.prepare() @@ -71,7 +71,7 @@ class TestVisonModels(unittest.TestCase): self.models_infer('resnet152') def test_lenet(self): - input = hapi.Input('x', [None, 1, 28, 28], 'float32') + input = hapi.Input([None, 1, 28, 28], 'float32', 'x') lenet = hapi.Model(models.__dict__['LeNet'](), input) lenet.prepare() diff --git a/python/paddle/incubate/hapi/vision/models/lenet.py b/python/paddle/incubate/hapi/vision/models/lenet.py index db1d894b4aa5f2535795c6350faad6ee3aee1164..dc7b094de0f26e04b9f07d011d3ce492950df269 100644 --- a/python/paddle/incubate/hapi/vision/models/lenet.py +++ b/python/paddle/incubate/hapi/vision/models/lenet.py @@ -13,7 +13,7 @@ #limitations under the License. import paddle.fluid as fluid -from paddle.nn import Conv2D, Pool2D, Linear, ReLU, Sequential +from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential __all__ = ['LeNet'] @@ -39,11 +39,11 @@ class LeNet(fluid.dygraph.Layer): super(LeNet, self).__init__() self.num_classes = num_classes self.features = Sequential( - Conv2D( + Conv2d( 1, 6, 3, stride=1, padding=1), ReLU(), Pool2D(2, 'max', 2), - Conv2D( + Conv2d( 6, 16, 5, stride=1, padding=0), ReLU(), Pool2D(2, 'max', 2)) diff --git a/python/paddle/incubate/hapi/vision/models/vgg.py b/python/paddle/incubate/hapi/vision/models/vgg.py index 74e7228e5249fe990d037c9f12e75b6d4839c591..30f6e120b2502113045b3583686360f4ed2c32ac 100644 --- a/python/paddle/incubate/hapi/vision/models/vgg.py +++ b/python/paddle/incubate/hapi/vision/models/vgg.py @@ -13,7 +13,7 @@ # limitations under the License. import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.nn import Conv2d, Pool2D, BatchNorm, Linear, ReLU from paddle.fluid.dygraph.container import Sequential from ...download import get_weights_path_from_url @@ -105,12 +105,11 @@ def make_layers(cfg, batch_norm=False): layers += [Pool2D(pool_size=2, pool_stride=2)] else: if batch_norm: - conv2d = Conv2D(in_channels, v, filter_size=3, padding=1) - layers += [conv2d, BatchNorm(v, act='relu')] + conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) + layers += [conv2d, BatchNorm(v), ReLU()] else: - conv2d = Conv2D( - in_channels, v, filter_size=3, padding=1, act='relu') - layers += [conv2d] + conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) + layers += [conv2d, ReLU()] in_channels = v return Sequential(*layers) diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index 875f3ff2e915513037942ff7a3323be48213fc12..89bbd5916578b6e3169452d85e581c438f2bbb47 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -20,6 +20,9 @@ __all__ = [ # 'Transform', 'DataLoader', 'get_worker_info', + 'Sampler', + 'SequenceSampler', + 'RandomSampler', 'load', 'save', 'load_program_state', @@ -38,7 +41,8 @@ __all__ = [ ] from ..fluid.io import DataLoader -from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info +from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info, \ + Sampler, SequenceSampler, RandomSampler from ..fluid.io import load, save, load_program_state, set_program_state, \ load_inference_model, save_inference_model, batch from ..reader import shuffle, buffered, cache, chain, firstn, compose, map_readers, xmap_readers diff --git a/python/paddle/metric/__init__.py b/python/paddle/metric/__init__.py index e03336f6dbab7b375701e1e694aee0bbbfa4b1cd..6e197881fc0bcbc32f9d9d738237082138f9410b 100644 --- a/python/paddle/metric/__init__.py +++ b/python/paddle/metric/__init__.py @@ -12,17 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the functions to calculate metric in this directory -__all__ = [ - 'Accuracy', 'Auc', 'ChunkEvaluator', 'CompositeMetric', 'DetectionMAP', - 'EditDistance', 'Precision', 'Recall', 'accuracy', 'auc', 'chunk_eval', - 'cos_sim', 'mean_iou' -] - - - -from ..fluid.metrics import Accuracy, Auc, ChunkEvaluator, CompositeMetric, DetectionMAP, EditDistance, \ - Precision, Recall +from .metrics import * +from . import metrics from ..fluid.layers.metric_op import accuracy, auc from ..fluid.layers.nn import chunk_eval, cos_sim, mean_iou + +__all__ = metrics.__all__ + [ + 'accuracy', + 'auc', + 'chunk_eval', + 'cos_sim', + 'mean_iou', +] diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..110a62c300559b9037cd2ca735aebd1946ba0ce9 --- /dev/null +++ b/python/paddle/metric/metrics.py @@ -0,0 +1,738 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import abc +import numpy as np + +import paddle + +__all__ = ['Metric', 'Accuracy', 'Precision', 'Recall', 'Auc'] + + +def _is_numpy_(var): + return isinstance(var, (np.ndarray, np.generic)) + + +@six.add_metaclass(abc.ABCMeta) +class Metric(object): + """ + Base class for metric, encapsulates metric logic and APIs + Usage: + + m = SomeMetric() + for prediction, label in ...: + m.update(prediction, label) + m.accumulate() + + Advanced usage for :code:`compute`: + + Metric calculation can be accelerated by calculating metric states + from model outputs and labels by build-in operators not by Python/NumPy + in :code:`compute`, metric states will be fetched as NumPy array and + call :code:`update` with states in NumPy format. + Metric calculated as follows (operations in Model and Metric are + indicated with curly brackets, while data nodes not): + inputs & labels || ------------------ + | || + {model} || + | || + outputs & labels || + | || tensor data + {Metric.compute} || + | || + metric states(tensor) || + | || + {fetch as numpy} || ------------------ + | || + metric states(numpy) || numpy data + | || + {Metric.update} \/ ------------------ + Examples: + + For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label` + as inputs, we can calculate the correct prediction matrix between + :code:`pred` and :code:`label` in :code:`compute`. + For examples, prediction results contains 10 classes, while :code:`pred` + shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size, + and we only need to calculate accurary of top-1 and top-5, we could + calculate the correct prediction matrix of the top-5 scores of the + prediction of each sample like follows, while the correct prediction + matrix shape is [N, 5]. + + .. code-block:: python + def compute(pred, label): + # sort prediction and slice the top-5 scores + pred = paddle.argsort(pred, descending=True)[:, :5] + # calculate whether the predictions are correct + correct = pred == label + return paddle.cast(correct, dtype='float32') + + With the :code:`compute`, we split some calculations to OPs (which + may run on GPU devices, will be faster), and only fetch 1 tensor with + shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1]. + :code:`update` can be define as follows: + + .. code-block:: python + def update(self, correct): + accs = [] + for i, k in enumerate(self.topk): + num_corrects = correct[:, :k].sum() + num_samples = len(correct) + accs.append(float(num_corrects) / num_samples) + self.total[i] += num_corrects + self.count[i] += num_samples + return accs + """ + + def __init__(self): + pass + + @abc.abstractmethod + def reset(self): + """ + Reset states and result + """ + raise NotImplementedError("function 'reset' not implemented in {}.". + format(self.__class__.__name__)) + + @abc.abstractmethod + def update(self, *args): + """ + Update states for metric + + Inputs of :code:`update` is the outputs of :code:`Metric.compute`, + if :code:`compute` is not defined, the inputs of :code:`update` + will be flatten arguments of **output** of mode and **label** from data: + :code:`update(output1, output2, ..., label1, label2,...)` + + see :code:`Metric.compute` + """ + raise NotImplementedError("function 'update' not implemented in {}.". + format(self.__class__.__name__)) + + @abc.abstractmethod + def accumulate(self): + """ + Accumulates statistics, computes and returns the metric value + """ + raise NotImplementedError( + "function 'accumulate' not implemented in {}.".format( + self.__class__.__name__)) + + @abc.abstractmethod + def name(self): + """ + Returns metric name + """ + raise NotImplementedError("function 'name' not implemented in {}.". + format(self.__class__.__name__)) + + def compute(self, *args): + """ + This API is advanced usage to accelerate metric calculating, calulations + from outputs of model to the states which should be updated by Metric can + be defined here, where Paddle OPs is also supported. Outputs of this API + will be the inputs of "Metric.update". + + If :code:`compute` is defined, it will be called with **outputs** + of model and **labels** from data as arguments, all outputs and labels + will be concatenated and flatten and each filed as a separate argument + as follows: + :code:`compute(output1, output2, ..., label1, label2,...)` + + If :code:`compute` is not defined, default behaviour is to pass + input to output, so output format will be: + :code:`return output1, output2, ..., label1, label2,...` + + see :code:`Metric.update` + """ + return args + + +class Accuracy(Metric): + """ + Encapsulates accuracy metric logic. + + Args: + topk (int|tuple(int)): Number of top elements to look at + for computing accuracy. Default is (1,). + name (str, optional): String name of the metric instance. Default + is `acc`. + + Example by standalone: + + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + x = paddle.to_tensor(np.array([ + [0.1, 0.2, 0.3, 0.4], + [0.1, 0.4, 0.3, 0.2], + [0.1, 0.2, 0.4, 0.3], + [0.1, 0.2, 0.3, 0.4]])) + y = paddle.to_tensor(np.array([[0], [1], [2], [3]])) + + m = paddle.metric.Accuracy() + correct = m.compute(x, y) + m.update(correct) + res = m.accumulate() + print(res) # 0.75 + + + Example with Model API: + + .. code-block:: python + + import paddle + import paddle.incubate.hapi as hapi + + paddle.disable_static() + train_dataset = hapi.datasets.MNIST(mode='train') + + model = hapi.Model(hapi.vision.LeNet(classifier_activation=None)) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + loss=paddle.nn.CrossEntropyLoss(), + metrics=paddle.metric.Accuracy()) + + model.fit(train_dataset, batch_size=64) + + """ + + def __init__(self, topk=(1, ), name=None, *args, **kwargs): + super(Accuracy, self).__init__(*args, **kwargs) + self.topk = topk + self.maxk = max(topk) + self._init_name(name) + self.reset() + + def compute(self, pred, label, *args): + """ + Compute the top-k (maxinum value in `topk`) indices. + + Args: + pred (Tensor): The predicted value is a Tensor wit type + float32 or float64. + label (Tensor): The ground truth value is a 2D Tensor, its + shape is [batch_size, 1] and type is int64. + + Return: + Tensor: Correct mask, a tensor with shape [batch_size, topk]. + """ + pred = paddle.argsort(pred, descending=True)[:, :self.maxk] + correct = pred == label + return paddle.cast(correct, dtype='float32') + + def update(self, correct, *args): + """ + Update the metrics states (correct count and total count), in order to + calculate cumulative accuracy of all instances. This function also + returns the accuracy of current step. + + Args: + correct: Correct mask, a tensor with shape [batch_size, topk]. + + Return: + Tensor: the accuracy of current step. + """ + if isinstance(correct, paddle.Tensor): + correct = correct.numpy() + accs = [] + for i, k in enumerate(self.topk): + num_corrects = correct[:, :k].sum() + num_samples = len(correct) + accs.append(float(num_corrects) / num_samples) + self.total[i] += num_corrects + self.count[i] += num_samples + accs = accs[0] if len(self.topk) == 1 else accs + return accs + + def reset(self): + """ + Resets all of the metric state. + """ + self.total = [0.] * len(self.topk) + self.count = [0] * len(self.topk) + + def accumulate(self): + """ + Computes and returns the accumulated metric. + """ + res = [] + for t, c in zip(self.total, self.count): + r = float(t) / c if c > 0 else 0. + res.append(r) + res = res[0] if len(self.topk) == 1 else res + return res + + def _init_name(self, name): + name = name or 'acc' + if self.maxk != 1: + self._name = ['{}_top{}'.format(name, k) for k in self.topk] + else: + self._name = [name] + + def name(self): + """ + Return name of metric instance. + """ + return self._name + + +class Precision(Metric): + """ + Precision (also called positive predictive value) is the fraction of + relevant instances among the retrieved instances. Refer to + https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers + + Noted that this class manages the precision score only for binary + classification task. + + Args: + name (str, optional): String name of the metric instance. + Default is `precision`. + + Example by standalone: + + .. code-block:: python + + import numpy as np + import paddle + + x = np.array([0.1, 0.5, 0.6, 0.7]) + y = np.array([0, 1, 1, 1]) + + m = paddle.metric.Precision() + m.update(x, y) + res = m.accumulate() + print(res) # 1.0 + + + Example with Model API: + + .. code-block:: python + + import numpy as np + + import paddle + import paddle.nn as nn + import paddle.incubate.hapi as hapi + + class Data(paddle.io.Dataset): + def __init__(self): + super(Data, self).__init__() + self.n = 1024 + self.x = np.random.randn(self.n, 10).astype('float32') + self.y = np.random.randint(2, size=(self.n, 1)).astype('float32') + + def __getitem__(self, idx): + return self.x[idx], self.y[idx] + + def __len__(self): + return self.n + + paddle.disable_static() + model = hapi.Model(nn.Sequential( + nn.Linear(10, 1), + nn.Sigmoid() + )) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + loss=nn.BCELoss(), + metrics=paddle.metric.Precision()) + + data = Data() + model.fit(data, batch_size=16) + """ + + def __init__(self, name='precision', *args, **kwargs): + super(Precision, self).__init__(*args, **kwargs) + self.tp = 0 # true positive + self.fp = 0 # false positive + self._name = name + + def update(self, preds, labels): + """ + Update the states based on the current mini-batch prediction results. + + Args: + preds (numpy.ndarray): The prediction result, usually the output + of two-class sigmoid function. It should be a vector (column + vector or row vector) with data type: 'float64' or 'float32'. + labels (numpy.ndarray): The ground truth (labels), + the shape should keep the same as preds. + The data type is 'int32' or 'int64'. + """ + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + elif not _is_numpy_(preds): + raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") + + if isinstance(labels, paddle.Tensor): + labels = labels.numpy() + elif not _is_numpy_(labels): + raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") + + sample_num = labels.shape[0] + preds = np.floor(preds + 0.5).astype("int32") + + for i in range(sample_num): + pred = preds[i] + label = labels[i] + if pred == 1: + if pred == label: + self.tp += 1 + else: + self.fp += 1 + + def reset(self): + """ + Resets all of the metric state. + """ + self.tp = 0 + self.fp = 0 + + def accumulate(self): + """ + Calculate the final precision. + + Returns: + A scaler float: results of the calculated precision. + """ + ap = self.tp + self.fp + return float(self.tp) / ap if ap != 0 else .0 + + def name(self): + """ + Returns metric name + """ + return self._name + + +class Recall(Metric): + """ + Recall (also known as sensitivity) is the fraction of + relevant instances that have been retrieved over the + total amount of relevant instances + + Refer to: + https://en.wikipedia.org/wiki/Precision_and_recall + + Noted that this class manages the recall score only for + binary classification task. + + Args: + name (str, optional): String name of the metric instance. + Default is `recall`. + + Example by standalone: + + .. code-block:: python + + import numpy as np + import paddle + + x = np.array([0.1, 0.5, 0.6, 0.7]) + y = np.array([1, 0, 1, 1]) + + m = paddle.metric.Recall() + m.update(x, y) + res = m.accumulate() + print(res) # 2.0 / 3.0 + + + Example with Model API: + + .. code-block:: python + + import numpy as np + + import paddle + import paddle.nn as nn + import paddle.incubate.hapi as hapi + + class Data(paddle.io.Dataset): + def __init__(self): + super(Data, self).__init__() + self.n = 1024 + self.x = np.random.randn(self.n, 10).astype('float32') + self.y = np.random.randint(2, size=(self.n, 1)).astype('float32') + + def __getitem__(self, idx): + return self.x[idx], self.y[idx] + + def __len__(self): + return self.n + + paddle.disable_static() + model = hapi.Model(nn.Sequential( + nn.Linear(10, 1), + nn.Sigmoid() + )) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + loss=nn.BCELoss(), + metrics=[paddle.metric.Precision(), paddle.metric.Recall()]) + + data = Data() + model.fit(data, batch_size=16) + """ + + def __init__(self, name='recall', *args, **kwargs): + super(Recall, self).__init__(*args, **kwargs) + self.tp = 0 # true positive + self.fn = 0 # false negative + self._name = name + + def update(self, preds, labels): + """ + Update the states based on the current mini-batch prediction results. + + Args: + preds(numpy.array): prediction results of current mini-batch, + the output of two-class sigmoid function. + Shape: [batch_size, 1]. Dtype: 'float64' or 'float32'. + labels(numpy.array): ground truth (labels) of current mini-batch, + the shape should keep the same as preds. + Shape: [batch_size, 1], Dtype: 'int32' or 'int64'. + """ + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + elif not _is_numpy_(preds): + raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") + + if isinstance(labels, paddle.Tensor): + labels = labels.numpy() + elif not _is_numpy_(labels): + raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") + + sample_num = labels.shape[0] + preds = np.rint(preds).astype("int32") + + for i in range(sample_num): + pred = preds[i] + label = labels[i] + if label == 1: + if pred == label: + self.tp += 1 + else: + self.fn += 1 + + def accumulate(self): + """ + Calculate the final recall. + + Returns: + A scaler float: results of the calculated Recall. + """ + recall = self.tp + self.fn + return float(self.tp) / recall if recall != 0 else .0 + + def reset(self): + """ + Resets all of the metric state. + """ + self.tp = 0 + self.fn = 0 + + def name(self): + """ + Returns metric name + """ + return self._name + + +class Auc(Metric): + """ + The auc metric is for binary classification. + Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve. + Please notice that the auc metric is implemented with python, which may be a little bit slow. + + The `auc` function creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the AUC. To discretize the AUC curve, a linearly spaced set of + thresholds is used to compute pairs of recall and precision values. The area + under the ROC-curve is therefore computed using the height of the recall + values by the false positive rate, while the area under the PR-curve is the + computed using the height of the precision values by the recall. + + Args: + curve (str): Specifies the mode of the curve to be computed, + 'ROC' or 'PR' for the Precision-Recall-curve. Default is 'ROC'. + num_thresholds (int): The number of thresholds to use when + discretizing the roc curve. Default is 4095. + 'ROC' or 'PR' for the Precision-Recall-curve. Default is 'ROC'. + name (str, optional): String name of the metric instance. Default + is `auc`. + + "NOTE: only implement the ROC curve type via Python now." + + Example by standalone: + .. code-block:: python + + import numpy as np + import paddle + + m = paddle.metric.Auc() + + n = 8 + class0_preds = np.random.random(size = (n, 1)) + class1_preds = 1 - class0_preds + + preds = np.concatenate((class0_preds, class1_preds), axis=1) + labels = np.random.randint(2, size = (n, 1)) + + m.update(preds=preds, labels=labels) + res = m.accumulate() + + + Example with Model API: + + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn as nn + import paddle.incubate.hapi as hapi + + class Data(paddle.io.Dataset): + def __init__(self): + super(Data, self).__init__() + self.n = 1024 + self.x = np.random.randn(self.n, 10).astype('float32') + self.y = np.random.randint(2, size=(self.n, 1)).astype('int64') + + def __getitem__(self, idx): + return self.x[idx], self.y[idx] + + def __len__(self): + return self.n + + paddle.disable_static() + model = hapi.Model(nn.Sequential( + nn.Linear(10, 2, act='softmax'), + )) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + + def loss(x, y): + return nn.functional.nll_loss(paddle.log(x), y) + + model.prepare( + optim, + loss=loss, + metrics=paddle.metric.Auc()) + data = Data() + model.fit(data, batch_size=16) + """ + + def __init__(self, + curve='ROC', + num_thresholds=4095, + name='auc', + *args, + **kwargs): + super(Auc, self).__init__(*args, **kwargs) + self._curve = curve + self._num_thresholds = num_thresholds + + _num_pred_buckets = num_thresholds + 1 + self._stat_pos = np.zeros(_num_pred_buckets) + self._stat_neg = np.zeros(_num_pred_buckets) + self._name = name + + def update(self, preds, labels): + """ + Update the auc curve with the given predictions and labels. + + Args: + preds (numpy.array): An numpy array in the shape of + (batch_size, 2), preds[i][j] denotes the probability of + classifying the instance i into the class j. + labels (numpy.array): an numpy array in the shape of + (batch_size, 1), labels[i] is either o or 1, + representing the label of the instance i. + """ + if isinstance(labels, paddle.Tensor): + labels = labels.numpy() + elif not _is_numpy_(labels): + raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") + + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + elif not _is_numpy_(preds): + raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") + + for i, lbl in enumerate(labels): + value = preds[i, 1] + bin_idx = int(value * self._num_thresholds) + assert bin_idx <= self._num_thresholds + if lbl: + self._stat_pos[bin_idx] += 1.0 + else: + self._stat_neg[bin_idx] += 1.0 + + @staticmethod + def trapezoid_area(x1, x2, y1, y2): + return abs(x1 - x2) * (y1 + y2) / 2.0 + + def accumulate(self): + """ + Return the area (a float score) under auc curve + + Return: + float: the area under auc curve + """ + tot_pos = 0.0 + tot_neg = 0.0 + auc = 0.0 + + idx = self._num_thresholds + while idx >= 0: + tot_pos_prev = tot_pos + tot_neg_prev = tot_neg + tot_pos += self._stat_pos[idx] + tot_neg += self._stat_neg[idx] + auc += self.trapezoid_area(tot_neg, tot_neg_prev, tot_pos, + tot_pos_prev) + idx -= 1 + + return auc / tot_pos / tot_neg if tot_pos > 0.0 and tot_neg > 0.0 else 0.0 + + def reset(self): + """ + Reset states and result + """ + _num_pred_buckets = self._num_thresholds + 1 + self._stat_pos = np.zeros(_num_pred_buckets) + self._stat_neg = np.zeros(_num_pred_buckets) + + def name(self): + """ + Returns metric name + """ + return self._name diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 9583d9a0a39b362ce4bda2c11cb976fbe705cbe3..dd5d0d269a6cb6377b11b6c98e86eef4ee0f8b57 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -18,6 +18,7 @@ from .layer import norm from .functional import extension from .layer import common +from .utils import weight_norm_hook from . import initializer @@ -25,6 +26,7 @@ __all__ = [] __all__ += norm.__all__ __all__ += extension.__all__ __all__ += common.__all__ +__all__ += weight_norm_hook.__all__ # TODO: define alias in nn directory # from .clip import ErrorClipByValue #DEFINE_ALIAS @@ -49,26 +51,56 @@ from .decode import beam_search_decode #DEFINE_ALIAS # from .decode import ctc_greedy_decoder #DEFINE_ALIAS # from .decode import dynamic_decode #DEFINE_ALIAS from .decode import gather_tree #DEFINE_ALIAS -from .input import data #DEFINE_ALIAS # from .input import Input #DEFINE_ALIAS -# from .layer.activation import PReLU #DEFINE_ALIAS -from .layer.activation import ReLU #DEFINE_ALIAS +from .layer.activation import ELU +from .layer.activation import GELU +from .layer.activation import Tanh +from .layer.activation import Hardshrink +from .layer.activation import Hardtanh +from .layer.activation import PReLU +from .layer.activation import ReLU +from .layer.activation import ReLU6 #DEFINE_ALIAS +from .layer.activation import SELU #DEFINE_ALIAS from .layer.activation import LeakyReLU #DEFINE_ALIAS from .layer.activation import Sigmoid #DEFINE_ALIAS -# from .layer.activation import Softmax #DEFINE_ALIAS +from .layer.activation import LogSigmoid +from .layer.activation import Softmax #DEFINE_ALIAS +from .layer.activation import Softplus #DEFINE_ALIAS +from .layer.activation import Softshrink #DEFINE_ALIAS +from .layer.activation import Softsign #DEFINE_ALIAS +from .layer.activation import Tanhshrink #DEFINE_ALIAS from .layer.activation import LogSoftmax #DEFINE_ALIAS from .layer.activation import HSigmoid #DEFINE_ALIAS from .layer.common import BilinearTensorProduct #DEFINE_ALIAS from .layer.common import Pool2D #DEFINE_ALIAS from .layer.common import Pad2D #DEFINE_ALIAS +from .layer.common import ReflectionPad1d #DEFINE_ALIAS +from .layer.common import ReplicationPad1d #DEFINE_ALIAS +from .layer.common import ConstantPad1d #DEFINE_ALIAS +from .layer.common import ReflectionPad2d #DEFINE_ALIAS +from .layer.common import ReplicationPad2d #DEFINE_ALIAS +from .layer.common import ConstantPad2d #DEFINE_ALIAS +from .layer.common import ZeroPad2d #DEFINE_ALIAS +from .layer.common import ReplicationPad3d #DEFINE_ALIAS +from .layer.common import ConstantPad3d #DEFINE_ALIAS +from .layer.common import CosineSimilarity #DEFINE_ALIAS from .layer.common import Embedding #DEFINE_ALIAS from .layer.common import Linear #DEFINE_ALIAS from .layer.common import Flatten #DEFINE_ALIAS from .layer.common import UpSample #DEFINE_ALIAS -from .layer.conv import Conv2D #DEFINE_ALIAS -from .layer.conv import Conv2DTranspose #DEFINE_ALIAS -from .layer.conv import Conv3D #DEFINE_ALIAS -from .layer.conv import Conv3DTranspose #DEFINE_ALIAS +from .layer.common import Bilinear #DEFINE_ALIAS +from .layer.common import Dropout #DEFINE_ALIAS +from .layer.common import Dropout2D #DEFINE_ALIAS +from .layer.common import Dropout3D #DEFINE_ALIAS +from .layer.common import AlphaDropout #DEFINE_ALIAS +from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS +from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS +from .layer.conv import Conv1d #DEFINE_ALIAS +from .layer.conv import Conv2d #DEFINE_ALIAS +from .layer.conv import Conv3d #DEFINE_ALIAS +from .layer.conv import ConvTranspose1d #DEFINE_ALIAS +from .layer.conv import ConvTranspose2d #DEFINE_ALIAS +from .layer.conv import ConvTranspose3d #DEFINE_ALIAS # from .layer.conv import TreeConv #DEFINE_ALIAS # from .layer.conv import Conv1D #DEFINE_ALIAS from .layer.extension import RowConv #DEFINE_ALIAS @@ -80,13 +112,18 @@ from .layer.extension import RowConv #DEFINE_ALIAS # from .layer.learning_rate import PiecewiseDecay #DEFINE_ALIAS # from .layer.learning_rate import PolynomialDecay #DEFINE_ALIAS # from .layer.loss import NCELoss #DEFINE_ALIAS +from .layer.loss import BCEWithLogitsLoss #DEFINE_ALIAS from .layer.loss import CrossEntropyLoss #DEFINE_ALIAS from .layer.loss import MSELoss #DEFINE_ALIAS from .layer.loss import L1Loss #DEFINE_ALIAS from .layer.loss import NLLLoss #DEFINE_ALIAS from .layer.loss import BCELoss #DEFINE_ALIAS +from .layer.loss import KLDivLoss #DEFINE_ALIAS from .layer.loss import MarginRankingLoss #DEFINE_ALIAS +from .layer.loss import CTCLoss #DEFINE_ALIAS +from .layer.loss import SmoothL1Loss #DEFINE_ALIAS from .layer.norm import BatchNorm #DEFINE_ALIAS +from .layer.norm import SyncBatchNorm #DEFINE_ALIAS from .layer.norm import GroupNorm #DEFINE_ALIAS from .layer.norm import LayerNorm #DEFINE_ALIAS from .layer.norm import SpectralNorm #DEFINE_ALIAS @@ -94,9 +131,18 @@ from .layer.norm import InstanceNorm #DEFINE_ALIAS # from .layer.rnn import RNNCell #DEFINE_ALIAS # from .layer.rnn import GRUCell #DEFINE_ALIAS # from .layer.rnn import LSTMCell #DEFINE_ALIAS +from .layer.transformer import MultiHeadAttention +from .layer.transformer import TransformerEncoderLayer +from .layer.transformer import TransformerEncoder +from .layer.transformer import TransformerDecoderLayer +from .layer.transformer import TransformerDecoder +from .layer.transformer import Transformer from .layer.distance import PairwiseDistance #DEFINE_ALIAS +from .layer.vision import PixelShuffle + from .layer import loss #DEFINE_ALIAS from .layer import conv #DEFINE_ALIAS +from .layer import vision #DEFINE_ALIAS from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS from ..fluid.dygraph.container import LayerList, ParameterList, Sequential #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index e3426b22484e4cea764f92cc44cc641386b7f6e4..afc1614732d06dcef4ca0e1e75cd93e28d6a2d3d 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -25,18 +25,23 @@ from . import extension __all__ += extension.__all__ from . import common __all__ += common.__all__ +from . import pooling +__all__ += pooling.__all__ +from . import loss +__all__ += loss.__all__ from .activation import brelu #DEFINE_ALIAS from .activation import elu #DEFINE_ALIAS from .activation import erf #DEFINE_ALIAS from .activation import gelu #DEFINE_ALIAS -from .activation import hard_shrink #DEFINE_ALIAS +from .activation import hardshrink #DEFINE_ALIAS +from .activation import hardtanh #DEFINE_ALIAS from .activation import hard_sigmoid #DEFINE_ALIAS from .activation import hard_swish #DEFINE_ALIAS from .activation import hsigmoid #DEFINE_ALIAS from .activation import leaky_relu #DEFINE_ALIAS from .activation import logsigmoid #DEFINE_ALIAS from .activation import maxout #DEFINE_ALIAS -# from .activation import prelu #DEFINE_ALIAS +from .activation import prelu #DEFINE_ALIAS from .activation import relu #DEFINE_ALIAS from .activation import relu6 #DEFINE_ALIAS from .activation import selu #DEFINE_ALIAS @@ -47,10 +52,14 @@ from .activation import softplus #DEFINE_ALIAS from .activation import softshrink #DEFINE_ALIAS from .activation import softsign #DEFINE_ALIAS from .activation import swish #DEFINE_ALIAS -from .activation import tanh_shrink #DEFINE_ALIAS +from .activation import tanh #DEFINE_ALIAS +from .activation import tanhshrink #DEFINE_ALIAS from .activation import thresholded_relu #DEFINE_ALIAS from .activation import log_softmax #DEFINE_ALIAS from .common import dropout #DEFINE_ALIAS +from .common import dropout2d #DEFINE_ALIAS +from .common import dropout3d #DEFINE_ALIAS +from .common import alpha_dropout #DEFINE_ALIAS # from .common import embedding #DEFINE_ALIAS # from .common import fc #DEFINE_ALIAS from .common import label_smooth #DEFINE_ALIAS @@ -58,14 +67,18 @@ from .common import one_hot #DEFINE_ALIAS from .common import pad #DEFINE_ALIAS from .common import pad_constant_like #DEFINE_ALIAS from .common import pad2d #DEFINE_ALIAS +from .common import cosine_similarity #DEFINE_ALIAS from .common import unfold #DEFINE_ALIAS # from .common import bilinear_tensor_product #DEFINE_ALIAS from .common import assign #DEFINE_ALIAS from .common import interpolate #DEFINE_ALIAS +from .common import bilinear #DEFINE_ALIAS +from .conv import conv1d #DEFINE_ALIAS +from .conv import conv_transpose1d #DEFINE_ALIAS from .conv import conv2d #DEFINE_ALIAS -from .conv import conv2d_transpose #DEFINE_ALIAS +from .conv import conv_transpose2d #DEFINE_ALIAS from .conv import conv3d #DEFINE_ALIAS -from .conv import conv3d_transpose #DEFINE_ALIAS +from .conv import conv_transpose3d #DEFINE_ALIAS from .extension import add_position_encoding #DEFINE_ALIAS # from .extension import autoincreased_step_counter #DEFINE_ALIAS from .extension import continuous_value_model #DEFINE_ALIAS @@ -119,6 +132,8 @@ from .lod import hash #DEFINE_ALIAS # from .lod import dynamic_gru #DEFINE_ALIAS # from .lod import dynamic_lstm #DEFINE_ALIAS # from .lod import dynamic_lstmp #DEFINE_ALIAS +from .loss import binary_cross_entropy #DEFINE_ALIAS +from .loss import binary_cross_entropy_with_logits #DEFINE_ALIAS from .loss import bpr_loss #DEFINE_ALIAS from .loss import center_loss #DEFINE_ALIAS from .loss import cross_entropy #DEFINE_ALIAS @@ -126,7 +141,7 @@ from .loss import dice_loss #DEFINE_ALIAS from .loss import edit_distance #DEFINE_ALIAS from .loss import huber_loss #DEFINE_ALIAS from .loss import iou_similarity #DEFINE_ALIAS -from .loss import kldiv_loss #DEFINE_ALIAS +from .loss import kl_div #DEFINE_ALIAS from .loss import l1_loss #DEFINE_ALIAS from .loss import log_loss #DEFINE_ALIAS from .loss import margin_ranking_loss #DEFINE_ALIAS @@ -139,10 +154,12 @@ from .loss import sampled_softmax_with_cross_entropy #DEFINE_ALIAS from .loss import sigmoid_cross_entropy_with_logits #DEFINE_ALIAS from .loss import sigmoid_focal_loss #DEFINE_ALIAS from .loss import smooth_l1 #DEFINE_ALIAS +from .loss import smooth_l1_loss #DEFINE_ALIAS from .loss import softmax_with_cross_entropy #DEFINE_ALIAS from .loss import square_error_cost #DEFINE_ALIAS from .loss import ssd_loss #DEFINE_ALIAS from .loss import teacher_student_sigmoid_loss #DEFINE_ALIAS +from .loss import ctc_loss #DEFINE_ALIAS # from .norm import batch_norm #DEFINE_ALIAS # from .norm import data_norm #DEFINE_ALIAS # from .norm import group_norm #DEFINE_ALIAS @@ -150,11 +167,22 @@ from .loss import teacher_student_sigmoid_loss #DEFINE_ALIAS from .norm import l2_normalize #DEFINE_ALIAS # from .norm import layer_norm #DEFINE_ALIAS from .norm import lrn #DEFINE_ALIAS +from .norm import normalize #DEFINE_ALIAS # from .norm import spectral_norm #DEFINE_ALIAS +from .pooling import max_pool1d #DEFINE_ALIAS +from .pooling import avg_pool1d #DEFINE_ALIAS +from .pooling import adaptive_max_pool1d #DEFINE_ALIAS +from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS from .pooling import pool2d #DEFINE_ALIAS from .pooling import pool3d #DEFINE_ALIAS from .pooling import adaptive_pool2d #DEFINE_ALIAS from .pooling import adaptive_pool3d #DEFINE_ALIAS +from .pooling import avg_pool2d #DEFINE_ALIAS +from .pooling import max_pool2d #DEFINE_ALIAS +from .pooling import avg_pool3d #DEFINE_ALIAS +from .pooling import max_pool3d #DEFINE_ALIAS +from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS +from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS # from .rnn import gru_unit #DEFINE_ALIAS # from .rnn import lstm #DEFINE_ALIAS # from .rnn import lstm_unit #DEFINE_ALIAS @@ -166,7 +194,7 @@ from .vision import box_clip #DEFINE_ALIAS from .vision import box_coder #DEFINE_ALIAS from .vision import box_decoder_and_assign #DEFINE_ALIAS from .vision import collect_fpn_proposals #DEFINE_ALIAS -# from .vision import deformable_conv #DEFINE_ALIAS +# from .vision import deformable_conv #DEFINE_ALIAS from .vision import deformable_roi_pooling #DEFINE_ALIAS from .vision import density_prior_box #DEFINE_ALIAS from .vision import detection_output #DEFINE_ALIAS @@ -175,10 +203,10 @@ from .vision import fsp_matrix #DEFINE_ALIAS from .vision import generate_mask_labels #DEFINE_ALIAS from .vision import generate_proposal_labels #DEFINE_ALIAS from .vision import generate_proposals #DEFINE_ALIAS -from .vision import grid_sampler #DEFINE_ALIAS +from .vision import grid_sample #DEFINE_ALIAS from .vision import image_resize #DEFINE_ALIAS from .vision import image_resize_short #DEFINE_ALIAS -# from .vision import multi_box_head #DEFINE_ALIAS +# from .vision import multi_box_head #DEFINE_ALIAS from .vision import pixel_shuffle #DEFINE_ALIAS from .vision import prior_box #DEFINE_ALIAS from .vision import prroi_pool #DEFINE_ALIAS @@ -195,3 +223,4 @@ from .vision import shuffle_channel #DEFINE_ALIAS from .vision import space_to_depth #DEFINE_ALIAS from .vision import yolo_box #DEFINE_ALIAS from .vision import yolov3_loss #DEFINE_ALIAS +from .input import one_hot #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index f524d74f408c033d6a7b2816aebf42a2525247cf..6acb806403ec782e664b9c173abbc29537fea3eb 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -14,61 +14,257 @@ # TODO: define activation functions of neural network from ...fluid.layers import brelu #DEFINE_ALIAS -from ...fluid.layers import elu #DEFINE_ALIAS from ...fluid.layers import erf #DEFINE_ALIAS -from ...fluid.layers import gelu #DEFINE_ALIAS -from ...fluid.layers import hard_shrink #DEFINE_ALIAS from ...fluid.layers import hard_sigmoid #DEFINE_ALIAS from ...fluid.layers import hard_swish #DEFINE_ALIAS -from ...fluid.layers import leaky_relu #DEFINE_ALIAS -from ...fluid.layers import logsigmoid #DEFINE_ALIAS from ...fluid.layers import maxout #DEFINE_ALIAS -from ...fluid.layers import relu6 #DEFINE_ALIAS -from ...fluid.layers import selu #DEFINE_ALIAS from ...fluid.layers import soft_relu #DEFINE_ALIAS -from ...fluid.layers import softplus #DEFINE_ALIAS -from ...fluid.layers import softshrink #DEFINE_ALIAS -from ...fluid.layers import softsign #DEFINE_ALIAS from ...fluid.layers import swish #DEFINE_ALIAS -from ...fluid.layers import tanh_shrink #DEFINE_ALIAS +from ...fluid.layers import sigmoid #DEFINE_ALIAS from ...fluid.layers import thresholded_relu #DEFINE_ALIAS +from ...tensor.math import tanh #DEFINE_ALIAS __all__ = [ 'brelu', 'elu', 'erf', 'gelu', - 'hard_shrink', + 'hardshrink', + 'hardtanh', 'hard_sigmoid', 'hard_swish', 'hsigmoid', 'leaky_relu', 'logsigmoid', 'maxout', - # 'prelu', + 'prelu', 'relu', 'relu6', 'selu', - 'sigmoid', 'soft_relu', 'softmax', 'softplus', 'softshrink', 'softsign', + 'sigmoid', 'swish', - 'tanh_shrink', + 'tanh', + 'tanhshrink', 'thresholded_relu', - 'log_softmax' + 'log_softmax', ] import warnings from ...fluid.layer_helper import LayerHelper from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ from ...fluid import core -from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.data_feeder import check_variable_and_dtype, check_dtype import paddle +def elu(x, alpha=1.0, name=None): + """ + elu activation. + + .. math:: + + elu(x) = max(0, x) + min(0, \\alpha * (e^{x}-1)) + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + alpha (float, optional): The 'alpha' value of the ELU formulation. Default is 1.0. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[-1,6],[1,15.6]])) + out = F.elu(x, alpha=0.2) + # [[-0.12642411 6. ] + # [ 1. 15.6 ]] + """ + + if in_dygraph_mode(): + return core.ops.elu(x, 'alpha', alpha) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu') + helper = LayerHelper("elu", **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='elu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) + return out + + +def gelu(x, approximate=False, name=None): + """ + gelu activation. + + if approximate is True + + .. math:: + + gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3}))) + + else + + .. math:: + + gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}})) + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + approximate (bool, optional): Wether to enable approximation. Default is False. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]])) + out1 = F.gelu(x) # [-0.158655 0.345731 0.841345 1.39979] + out2 = F.gelu(x, True) # [-0.158808 0.345714 0.841192 1.39957] + """ + + if in_dygraph_mode(): + return core.ops.gelu(x, 'approximate', approximate) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu') + helper = LayerHelper("gelu", **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='gelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'approximate': approximate}) + return out + + +def hardshrink(x, threshold=0.5, name=None): + """ + hard shrinkage activation + + .. math:: + + hardshrink(x)= + \left\{ + \begin{aligned} + &x, & & if \ x > threshold \\ + &x, & & if \ x < -threshold \\ + &0, & & if \ others + \end{aligned} + \right. + + Args: + x (Tensor): The input Tensor with data type float32, float64. + threshold (float, optional): The value of threshold for hardthrink. Default is 0.5 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1, 0.3, 2.5])) + out = F.hardshrink(x) # [-1., 0., 2.5] + + """ + if in_dygraph_mode(): + return core.ops.hard_shrink(x, 'threshold', threshold) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'hardshrink') + helper = LayerHelper('hardshrink', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='hard_shrink', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) + return out + + +def hardtanh(x, min=-1.0, max=1.0, name=None): + """ + hardtanh activation + + .. math:: + + hardtanh(x)= \\begin{cases} + max, \\text{if } x > max \\\\ + min, \\text{if } x < min \\\\ + x, \\text{otherwise} + \\end{cases} + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + min (float, optional): The minimum value of the linear region range. Default is -1. + max (float, optional): The maximum value of the linear region range. Default is 1. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1.5, 0.3, 2.5])) + out = F.hardtanh(x) # [-1., 0.3, 1.] + """ + + if in_dygraph_mode(): + return core.ops.brelu(x, 't_min', min, 't_max', max) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'hardtanh') + + helper = LayerHelper('hardtanh', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='brelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'t_min': min, + 't_max': max}) + return out + + def hsigmoid(input, label, weight, @@ -126,7 +322,6 @@ def hsigmoid(input, Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`. Examples: - .. code-block:: python from paddle import fluid, nn @@ -192,120 +387,303 @@ def hsigmoid(input, return out -def relu(input, inplace=False, name=None): +def leaky_relu(x, negative_slope=0.01, name=None): """ - :alias_main: paddle.nn.functional.relu - :alias: paddle.nn.functional.relu,paddle.nn.functional.activation.relu - - ReLU Activation. + leaky_relu activation .. math: + leaky_relu(x)= + \left\{ + \begin{aligned} + &x, & & if \ x >= 0 \\ + &negative\_slope * x, & & otherwise \\ + \end{aligned} + \right. \\ - out = max(x, 0) + Args: + x (Tensor): The input Tensor with data type float32, float64. + negative_slope (float, optional): Slope of the activation function at + :math:`x < 0` . Default is 0.01. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-2, 0, 1], 'float32')) + out = F.leaky_relu(x) # [-0.02, 0., 1.] + + """ + if in_dygraph_mode(): + return core.ops.leaky_relu(x, 'alpha', negative_slope) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'leaky_relu') + helper = LayerHelper('leaky_relu', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='leaky_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': negative_slope}) + return out + + +def prelu(x, weight, name=None): + """ + prelu activation. + + .. math:: + + prelu(x) = max(0, x) + weight * min(0, x) Parameters: - input (Variable): The input variable. A multi-dimension Tensor with type float16, float32, or float64. - inplace (bool, optional): If inplace is True, the input and output of ``ReLU`` are the same variable. - Otherwise, the input and output of ``ReLU`` are different variables. Default: False. Note that if x is - more than one OPs' input, inplace must be False. - name (str, optional): The default value is None. Normally there is no need for user to set this property. - For more information, please refer to :ref:`api_guide_Name` . + x (Tensor): The input Tensor with data type float32, float64. + weight (Tensor): The learnable parameter with data type same as ``x``. + The weight shape is [1] or [in], where `in` is the input channel of ``x``. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Output of relu operator, a Tensor with shape same as input + A Tensor with the same data type and shape as ``x`` . Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.nn.functional as functional - import numpy as np + import paddle + import paddle.nn.functional as F + import numpy as np - data = np.array([-2, 0, 1]).astype('float32') - with fluid.dygraph.guard(): - data = fluid.dygraph.to_variable(data) - res = functional.relu(data) # [0, 0, 1] + paddle.disable_static() + + data = np.array([[[[-2.0, 3.0, -4.0, 5.0], + [ 3.0, -4.0, 5.0, -6.0], + [-7.0, -8.0, 8.0, 9.0]], + [[ 1.0, -2.0, -3.0, 4.0], + [-5.0, 6.0, 7.0, -8.0], + [ 6.0, 7.0, 8.0, 9.0]]]], 'float32') + x = paddle.to_tensor(data) + w = paddle.to_tensor(np.array([0.25]).astype('float32')) + out = F.prelu(x, w) + # [[[[-0.5 , 3. , -1. , 5. ], + # [ 3. , -1. , 5. , -1.5 ], + # [-1.75, -2. , 8. , 9. ]], + # [[ 1. , -0.5 , -0.75, 4. ], + # [-1.25, 6. , 7. , -2. ], + # [ 6. , 7. , 8. , 9. ]]]] """ + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu') + check_variable_and_dtype(weight, 'weight', + ['float16', 'float32', 'float64'], 'prelu') + + helper = LayerHelper('prelu', **locals()) + assert len(weight.shape + ) == 1, "The dim count of weight shape should be 1 in prelu()." + + # NOTE(): The input of this API should be ``N,C,...`` format, + # which means x.shape[0] is batch_size and x.shape[0] is channel. + mode = 'all' + if weight.shape[0] > 1: + assert len( + x.shape + ) > 1, "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]." + assert weight.shape[0] == x.shape[ + 1], "The weight size should be equal to x input channel in prelu() when weight shape is not [1]." + mode = 'channel' if in_dygraph_mode(): - if inplace: - warnings.warn( - "Inplace on ReLU is not allowed and will be discarded in dygraph mode currently." - ) - return core.ops.relu(input) + return core.ops.prelu(x, weight, 'mode', mode) - check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], - 'relu') + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type="prelu", + inputs={"X": x, + "Alpha": weight}, + outputs={"Out": out}, + attrs={"mode": mode}) + return out - helper = LayerHelper('relu', **locals()) - outs = input if inplace else helper.create_variable_for_type_inference( - input.dtype) - helper.append_op(type='relu', inputs={'X': [input]}, outputs={'Out': outs}) - return outs +def relu(x, name=None): + """ + relu activation. + + .. math:: + + out = max(x, 0) + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() -def sigmoid(input, inplace=False, name=None): + x = paddle.to_tensor(np.array([-2, 0, 1]).astype('float32')) + out = F.relu(x) # [0., 0., 1.] """ - :alias_main: paddle.nn.functional.sigmoid - :alias: paddle.nn.functional.sigmoid,paddle.nn.functional.activation.sigmoid - Sigmoid Activation. + if in_dygraph_mode(): + return core.ops.relu(x) - .. math: + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu') + helper = LayerHelper('relu', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op(type='relu', inputs={'X': x}, outputs={'Out': out}) + return out + + +def logsigmoid(x, name=None): + """ + logsigmoid activation. + + .. math:: - output = \frac{1}{1 + e^{-input}} + logsigmoid(x) = log \\frac{1}{1 + e^{-x}} Parameters: - input (Variable): The input variable. A multi-dimension Tensor with type float16, float32, or float64. - inplace (bool, optional): If inplace is True, the input and output are the same variable. - Otherwise, the input and output of are different variables. Default: False. Note that if x is - more than one OPs' input, inplace must be False. - name (str, optional): The default value is None. Normally there is no need for user to set this property. - For more information, please refer to :ref:`api_guide_Name` . + x (Tensor): The input Tensor with data type float32, float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Output of sigmoid operator, a Tensor with shape same as input + A Tensor with the same data type and shape as ``x`` . Examples: .. code-block:: python - - import paddle.fluid as fluid - import paddle.nn.functional as functional - import numpy as np - # In the static graph mode - input = fluid.data(name="input", shape=[None, 4]) - output = functional.sigmoid(input) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - input_data = np.array([1.0, 2.0, 3.0, 4.0]).astype('float32') - output_data = exe.run(feed={"input": input_data}, - fetch_list=[output]) - print(output_data) # [0.7310586, 0.880797, 0.95257413, 0.98201376] - # In the dynamic graph mode - with fluid.dygraph.guard(): - input = fluid.dygraph.to_variable(input_data) - output = functional.sigmoid(input) - print(output) # [0.7310586, 0.880797, 0.95257413, 0.98201376] + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) + out = F.logsigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] """ if in_dygraph_mode(): - if inplace: - warnings.warn( - "Inplace on sigmoid is not allowed and will be discarded in dygraph mode currently." - ) - return core.ops.sigmoid(input) - - check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'], - 'sigmoid') - helper = LayerHelper("sigmoid", **locals()) - outputs = helper.create_variable_for_type_inference(input.dtype) + return core.ops.logsigmoid(x) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'logsigmoid') + helper = LayerHelper("logsigmoid", **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op(type='logsigmoid', inputs={'X': x}, outputs={'Out': out}) + return out + + +def relu6(x, name=None): + """ + relu6 activation + + .. math:: + + relu6(x) = min(max(0,x), 6) + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1, 0.3, 6.5])) + out = F.relu6(x) # [0, 0.3, 6] + """ + threshold = 6.0 + if in_dygraph_mode(): + return core.ops.relu6(x, 'threshold', threshold) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu6') + helper = LayerHelper('relu6', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='relu6', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) + return out + + +def selu(x, + scale=1.0507009873554804934193349852946, + alpha=1.6732632423543772848170429916717, + name=None): + """ + selu activation + + .. math:: + + selu(x) = scale * (max(0,x) + min(0, alpha * (e^{x} - 1))) + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + scale (float, optional): The value of scale for selu. Default is 1.0507009873554804934193349852946 + alpha (float, optional): The value of alpha for selu. Default is 1.6732632423543772848170429916717 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[0.0, 1.0],[2.0, 3.0]])) + out = F.selu(x) # [[0, 1.050701],[2.101402, 3.152103]] + """ + if in_dygraph_mode(): + return core.ops.selu(x, 'scale', scale, 'alpha', alpha) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'selu') + helper = LayerHelper('selu', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( - type='sigmoid', inputs={'X': [input]}, outputs={'Out': outputs}) - return outputs + type='selu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'scale': scale, + 'alpha': alpha}) + return out -def softmax(x, axis=-1, name=None): +def softmax(x, axis=-1, dtype=None, name=None): """ This operator implements the softmax layer. The calculation process is as follows: @@ -332,7 +710,7 @@ def softmax(x, axis=-1, name=None): .. math:: - out[i, j] = \\frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])} + softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])} Example: @@ -381,12 +759,104 @@ def softmax(x, axis=-1, name=None): [0.26762315, 0.26762315, 0.26762315, 0.26762315], [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] - Args: - x (Tensor): The input multi-dimension Tensor with data type float32, float64. - axis (int, optional): The axis along which to perform softmax calculations. - It should be in range [-D, D), where D is the dimensions of ``x`` . - When ``axis`` < 0, it works the same way as :math:`axis + D` . - Default is -1. + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + axis (int, optional): The axis along which to perform log_softmax + calculations. It should be in range [-D, D), where D is the + dimensions of ``x`` . If ``axis`` < 0, it works the same way as + :math:`axis + D` . Default is -1. + dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data + type of the output tensor. If dtype is specified, ``x`` is casted + to ``dtype`` before the operation is performed. This is useful for + preventing data type overflows. Supported dtype: float32, float64. + If ``dtype`` is None, the output Tensor has the same dtype as x. + Default is None. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same shape and data type (use ``dtype`` if it is + specified) as x. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = np.array([[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]], 'float32') + x = paddle.to_tensor(x) + out1 = F.softmax(x) + out2 = F.softmax(x, dtype='float64') + # out1's data type is float32; out2's data type is float64 + # out1 and out2's value is as follows: + # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.07232949, 0.19661193, 0.19661193, 0.53444665]], + # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + """ + + if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): + dtype = convert_np_dtype_to_dtype_(dtype) + use_cudnn = True if axis is -1 else False + + if in_dygraph_mode(): + outs_cast = x if dtype is None \ + else core.ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) + return core.ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', use_cudnn) + + if dtype is None: + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'softmax') + else: + check_dtype(dtype, 'dtype', ['float32', 'float64'], 'softmax', + 'If dtype is not None, it only support float32 or float64.') + + helper = LayerHelper("softmax", **locals()) + outs_cast = x + if dtype is not None: + outs_cast = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='cast', + inputs={'X': x}, + outputs={'Out': outs_cast}, + attrs={'in_dtype': x.dtype, + 'out_dtype': dtype}) + + outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype) + helper.append_op( + type='softmax', + inputs={'X': outs_cast}, + outputs={'Out': outs_softmax}, + attrs={'axis': axis, + 'use_cudnn': use_cudnn}) + + return outs_softmax + + +def softplus(x, beta=1, threshold=20, name=None): + """ + softplus activation + + .. math:: + + softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\ + \\text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.} + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + beta (float, optional): The value of beta for softplus. Default is 1 + threshold (float, optional): The value of threshold for softplus. Default is 20 name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -394,118 +864,252 @@ def softmax(x, axis=-1, name=None): A Tensor with the same data type and shape as ``x`` . Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355] + """ + if in_dygraph_mode(): + return core.ops.softplus(x, 'beta', beta, 'threshold', threshold) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'softplus') + helper = LayerHelper('softplus', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='softplus', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'beta': beta, + 'threshold': threshold}) + return out + + +def softshrink(x, threshold=0.5, name=None): + """ + softshrink activation + + .. math:: + + softshrink(x)= \\begin{cases} + x - threshold, \\text{if } x > threshold \\\\ + x + threshold, \\text{if } x < -threshold \\\\ + 0, \\text{otherwise} + \\end{cases} + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + threshold (float, optional): The value of threshold(must be no less than zero) for softplus. Default is 0.5 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F - import numpy as np + import paddle + import paddle.nn.functional as F + import numpy as np - paddle.disable_static() + paddle.disable_static() - x = np.array([[[2.0, 3.0, 4.0, 5.0], - [3.0, 4.0, 5.0, 6.0], - [7.0, 8.0, 8.0, 9.0]], - [[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [6.0, 7.0, 8.0, 9.0]]], 'float32') - x = paddle.to_variable(x) - out = F.softmax(x) - # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.07232949, 0.19661193, 0.19661193, 0.53444665]], - # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + x = paddle.to_tensor(np.array([-0.9, -0.2, 0.1, 0.8])) + out = F.softshrink(x) # [-0.4, 0, 0, 0.3] """ - return paddle.fluid.layers.softmax(input=x, axis=axis, name=name) + if threshold < 0: + raise ValueError( + "The threshold must be no less than zero. Received: {}.".format( + threshold)) + + if in_dygraph_mode(): + return core.ops.softshrink(x, 'lambda', threshold) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'softshrink') + helper = LayerHelper('softshrink', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='softshrink', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'lambda': threshold}) + return out -def log_softmax(input, axis=None, dtype=None, name=None): +def softsign(x, name=None): """ - :alias_main: paddle.nn.functional.log_softmax - :alias: paddle.nn.functional.log_softmax,paddle.nn.functional.activation.log_softmax + softsign activation + + .. math:: + + softsign(x) = \\frac{x}{1 + |x|} + + Parameters: + x (Tensor): The input Tensor with data type float32, float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np - This operator implements the log_softmax layer. The calculation process is as follows: + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769] + """ + if in_dygraph_mode(): + return core.ops.softsign(x) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'softsign') + helper = LayerHelper('softsign', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op(type='softsign', inputs={'X': x}, outputs={'Out': out}) + return out + + +def tanhshrink(x, name=None): + """ + tanhshrink activation + + .. math:: + + tanhshrink(x) = x - tanh(x) + + Args: + x (Tensor): The input Tensor with data type float32, float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor with the same data type and shape as ``x`` . + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739] + """ + if in_dygraph_mode(): + return core.ops.tanh_shrink(x) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'tanhshrink') + helper = LayerHelper('tanh_shrink', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op(type='tanh_shrink', inputs={'X': x}, outputs={'Out': out}) + return out + + +def log_softmax(x, axis=-1, dtype=None, name=None): + """ + This operator implements the log_softmax layer. The calculation process is + as follows: .. math:: Out[i, j] = log(softmax(x)) - = log(\\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) + = log(\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) Parameters: - input (Variable): The input variable. A multi-dimension Tensor with type float32, or float64. - axis (int, optional): The index of dimension to perform softmax calculations, it should be in - range :math:`[-1, rank-1]`, while :math:`rank` is the rank of input variable. Default: None. - None and -1 means the last dimension. - dtype (np.dtype|core.VarDesc.VarType|str): The desired data type of returned tensor. If specified, - the input tensor is casted to dtype before the operation is performed. This is useful for - preventing data type overflows. Default: None. Supported dtype: float32 or float64 - name (str, optional): The default value is None. Normally there is no need for user to set this property. - For more information, please refer to :ref:`api_guide_Name` . + x (Tensor): The input Tensor with data type float32, float64. + axis (int, optional): The axis along which to perform log_softmax + calculations. It should be in range [-D, D), where D is the + dimensions of ``x`` . If ``axis`` < 0, it works the same way as + :math:`axis + D` . Default is -1. + dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data + type of the output tensor. If dtype is specified, ``x`` is casted + to ``dtype`` before the operation is performed. This is useful for + preventing data type overflows. Supported dtype: float32, float64. + If ``dtype`` is None, the output Tensor has the same dtype as x. + Default is None. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input``. + A Tensor with the same shape and data type (use ``dtype`` if it is + specified) as x. Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.nn.functional as F - import numpy as np + import paddle + import paddle.nn.functional as F + import numpy as np - data = np.array([[[-2.0, 3.0, -4.0, 5.0], + paddle.disable_static() + + x = np.array([[[-2.0, 3.0, -4.0, 5.0], [3.0, -4.0, 5.0, -6.0], [-7.0, -8.0, 8.0, 9.0]], - [[1.0, -2.0, -3.0, 4.0], + [[1.0, -2.0, -3.0, 4.0], [-5.0, 6.0, 7.0, -8.0], - [6.0, 7.0, 8.0, 9.0]]]).astype('float32') - with fluid.dygraph.guard(): - data = fluid.dygraph.to_variable(data) - res = F.log_softmax(data, -1) - # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] - # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] - # [-16.313261 -17.313261 -1.3132617 -0.31326184]] - # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] - # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] - # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] + [6.0, 7.0, 8.0, 9.0]]], 'float32') + x = paddle.to_tensor(x) + out1 = F.log_softmax(x) + out2 = F.log_softmax(x, dtype='float64') + # out1's data type is float32; out2's data type is float64 + # out1 and out2's value is as follows: + # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] + # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] + # [-16.313261 -17.313261 -1.3132617 -0.31326184]] + # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] + # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] + # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] """ - axis = -1 if axis is None else axis - dtype = convert_np_dtype_to_dtype_(dtype) if dtype is not None else dtype + if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): + dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): - outs_cast = input if dtype is None \ - else core.ops.cast(input, 'in_dtype', input.dtype, 'out_dtype', dtype) - outs_softmax = core.ops.softmax(outs_cast, 'axis', axis, 'use_cudnn', - False) - return core.ops.log(outs_softmax) + if dtype is not None: + x = core.ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) + return core.ops.log_softmax(x, 'axis', axis) if dtype is None: - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'log_softmax') + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'log_softmax') + else: + check_dtype(dtype, 'dtype', ['float32', 'float64'], 'log_softmax', + 'If dtype is not None, it only support float32 or float64.') helper = LayerHelper("log_softmax", **locals()) - outs_cast = input + out_cast = x if dtype is not None: - outs_cast = helper.create_variable_for_type_inference(dtype) + out_cast = helper.create_variable_for_type_inference(dtype) helper.append_op( type='cast', - inputs={'X': input}, - outputs={'Out': outs_cast}, - attrs={'in_dtype': input.dtype, + inputs={'X': x}, + outputs={'Out': out_cast}, + attrs={'in_dtype': x.dtype, 'out_dtype': dtype}) - outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype) + out = helper.create_variable_for_type_inference(out_cast.dtype) helper.append_op( - type='softmax', - inputs={'X': outs_cast}, - outputs={'Out': outs_softmax}, - attrs={'axis': axis, - 'use_cudnn': False}) + type='log_softmax', + inputs={'X': out_cast}, + outputs={'Out': out}, + attrs={'axis': axis}) - outs_log = helper.create_variable_for_type_inference(outs_softmax.dtype) - helper.append_op( - type='log', inputs={'X': outs_softmax}, outputs={'Out': outs_log}) - - return outs_log + return out diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index fe41cb6e64c34f34add3c0652ab5b30efe958161..cff108ec6a9a8666e0aa51ba0414fd885777f1a7 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -13,23 +13,36 @@ # limitations under the License. import warnings +import paddle +from ...fluid.framework import in_dygraph_mode, default_main_program from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.layers.tensor import Variable, fill_constant +from paddle.fluid.layers.tensor import Variable, fill_constant, zeros, concat # TODO: define the common functions to build a neural network -from ...fluid.layers import dropout #DEFINE_ALIAS from ...fluid.layers import label_smooth #DEFINE_ALIAS from ...fluid import one_hot #DEFINE_ALIAS -from ...fluid.layers import pad #DEFINE_ALIAS from ...fluid.layers import pad2d #DEFINE_ALIAS from ...fluid.layers import unfold #DEFINE_ALIAS from ...fluid.layers import assign #DEFINE_ALIAS +from ...fluid.layers import squeeze #DEFINE_ALIAS +from ...fluid.layers import unsqueeze #DEFINE_ALIAS +from ...fluid.layers import elementwise_mul #DEFINE_ALIAS +from ...tensor import clip +from ...tensor import sum +from ...tensor import sqrt #from ...fluid.layers import fc #DEFINE_ALIAS from ...fluid.layers import pad_constant_like #DEFINE_ALIAS +from ...fluid.framework import in_dygraph_mode +from ...fluid import core, dygraph_utils +from ...fluid import core, layers +from ...fluid.data_feeder import check_variable_and_dtype __all__ = [ 'dropout', + 'dropout2d', + 'dropout3d', + 'alpha_dropout', # 'embedding', # 'fc', 'label_smooth', @@ -40,7 +53,9 @@ __all__ = [ 'unfold', # 'bilinear_tensor_product', 'assign', - 'interpolate' + 'interpolate', + 'bilinear', + 'cosine_similarity', ] @@ -446,3 +461,708 @@ def interpolate(input, outputs={"Out": out}, attrs=attrs) return out + + +def bilinear(x1, x2, weight, bias=None, name=None): + """ + + This layer performs bilinear on two inputs. + + .. math:: + out_{i} = x1 * W_{i} * {x2^\mathrm{T}}, i=0,1,...,size-1 + out = out + b + + In this formula: + - :math:`x1`: the first input contains in1_features elements, shape is [batch_size, in1_features]. + - :math:`x2`: the second input contains in2_features elements, shape is [batch_size, in2_features]. + - :math:`W_{i}`: the i-th learned weight, shape is [in1_features, in2_features], and learned weight's shape is [out_features, in1_features, in2_features]. + - :math:`out_{i}`: the i-th element of out, shape is [batch_size, out_features]. + - :math:`b`: the learned bias, shape is [1, out_features]. + - :math:`x2^\mathrm{T}`: the transpose of :math:`x2`. + + Parameters: + x1 (Tensor): the first input tensor, it's data type should be float32, float64. + x2 (Tensor): the second input tensor, it's data type should be float32, float64. + weight (Parameter): The learnable weights of this layer, shape is [out_features, in1_features, in2_features]. + bias (Parameter, optional): The learnable bias(Bias) of this layer, shape is [1, out_features]. If it is set to None, no bias will be added to the output units. The default value is None. + name (str, optional): The default value is None. Normally there is no need for user + to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None. + + Returns: + Variable: A 2-D Tensor of shape [batch_size, out_features]. + + Examples: + .. code-block:: python + + import paddle + import numpy + import paddle.nn.functional as F + + paddle.disable_static() + x1 = numpy.random.random((5, 5)).astype('float32') + x2 = numpy.random.random((5, 4)).astype('float32') + w = numpy.random.random((1000, 5, 4)).astype('float32') + b = numpy.random.random((1, 1000)).astype('float32') + + result = F.bilinear(paddle.to_tensor(x1), paddle.to_tensor(x2), paddle.to_tensor(w), paddle.to_tensor(b)) # result shape [5, 1000] + + """ + + if in_dygraph_mode(): + return core.ops.bilinear_tensor_product(x1, x2, weight, bias) + + check_variable_and_dtype(x1, 'x1', ['float32', 'float64'], 'bilinear') + check_variable_and_dtype(x2, 'x2', ['float32', 'float64'], 'bilinear') + + inputs = {"X": x1, "Y": x2, "Weight": weight} + if bias is not None: + inputs["Bias"] = bias + + helper = LayerHelper("bilinear", **locals()) + out = helper.create_variable_for_type_inference(dtype=x1.dtype) + + helper.append_op( + type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}) + + return out + + +def dropout(x, + p=0.5, + axis=None, + training=True, + mode="upscale_in_train", + name=None): + """ + Dropout is a regularization technique for reducing overfitting by preventing + neuron co-adaption during training. The dropout operator randomly sets the + outputs of some units to zero, while upscale others according to the given + dropout probability. + + Args: + x (Tensor): The input tensor. The data type is float32 or float64. + p (float | int): Probability of setting units to zero. Default 0.5. + axis (int | list): The axis along which the dropout is performed. Default None. + training (bool): A flag indicating whether it is in train phrase or not. Default True. + mode(str): ['upscale_in_train'(default) | 'downscale_in_infer'] + + 1. upscale_in_train(default), upscale the output at training time + + - train: out = input * mask / ( 1.0 - dropout_prob ) + - inference: out = input + + 2. downscale_in_infer, downscale the output at inference + + - train: out = input * mask + - inference: out = input * (1.0 - dropout_prob) + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor representing the dropout, has same shape and data type as `x` . + + Examples: + We use ``p=0.5`` in the following description for simplicity. + 1. When ``axis=None`` , this is commonly used dropout, which dropout each element of x randomly. + Let's see a simple case when x is a 2d tensor with shape 2*3: + [[1 2 3] + [4 5 6]] + we generate mask with the same shape as x, which is 2*3. The value of mask is + sampled from a Bernoulli distribution randomly. For example, we may get such mask: + [[0 1 0] + [1 0 1]] + So the output is obtained from elementwise multiply of x and mask: + [[0 2 0] + [4 0 6]] + Using default setting, i.e. ``mode='upscale_in_train'`` , + if in training phase, the final upscale output is: + [[0 4 0 ] + [8 0 12]] + if in test phase, the output is the same as input: + [[1 2 3] + [4 5 6]] + we can also set ``mode='downscale_in_infer'`` , then + if in training phase, the final output is: + [[0 2 0] + [4 0 6]] + if in test phase, the scale output is: + [[0.5 1. 1.5] + [2. 2.5 3. ]] + + 2. When ``axis!=None`` , this is useful for dropping whole channels from an image or sequence. + Let's see the simple case when x is a 2d tensor with shape 2*3 again: + [[1 2 3] + [4 5 6]] + (1) If ``axis=0`` , this means the dropout is only performed in axis `0` . + we generate mask with the shape 2*1. Only in axis `0` the value is randomly selected. + For example, we may get such mask: + [[1] + [0]] + The output is obtained from elementwise multiply of x and mask. Doing that the mask will be + broadcast from 2*1 to 2*3: + [[1 1 1] + [0 0 0]] + and the result after elementwise multiply is: + [[1 2 3] + [0 0 0]] + then we can do upscale or downscale according to the setting of other arguments. + (2) If ``axis=1`` , this means the dropout is only performed in axis `1` . + we generate mask with the shape 1*3. Only in axis `1` the value is randomly selected. + For example, we may get such mask: + [[1 0 1]] + Doing elementwise multiply the mask will be broadcast from 1*3 to 2*3: + [[1 0 1] + [1 0 1]] + and the result after elementwise multiply is: + [[1 0 3] + [4 0 6]] + (3) What about ``axis=[0, 1]`` ? This means the dropout is performed in all axes of x, + which is the same case as default setting ``axis=None`` . + (4) You may note that logically `axis=None` means the dropout is performed in none axis of x, + We generate mask with the shape 1*1. Whole input is randomly selected or dropped. + For example, we may get such mask: + [[0]] + Doing elementwise multiply the mask will be broadcast from 1*1 to 2*3: + [[0 0 0] + [0 0 0]] + and the result after elementwise multiply is: + [[0 0 0] + [0 0 0]] + Actually this is not what we want because all elements may set to zero~ + When x is a 4d tensor with shape `NCHW`, we can set ``axis=[0,1]`` and the dropout will be performed + in channel `N` and `C`, `H` and `W` is tied, i.e. + paddle.nn.dropout(x, p, axis=[0,1]) + Please refer to ``paddle.nn.functional.dropout2d`` for more details. + Similarly, when x is a 5d tensor with shape `NCDHW`, we can set ``axis=[0,1]`` to perform + dropout3d. Please refer to ``paddle.nn.functional.dropout3d`` for more details. + + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.array([[1,2,3], [4,5,6]]).astype('float32') + x = paddle.to_tensor(x) + y_train = paddle.nn.functional.dropout(x, 0.5) + y_test = paddle.nn.functional.dropout(x, 0.5, training=False) + y_0 = paddle.nn.functional.dropout(x, axis=0) + y_1 = paddle.nn.functional.dropout(x, axis=1) + y_01 = paddle.nn.functional.dropout(x, axis=[0,1]) + print(x.numpy()) + print(y_train.numpy()) + print(y_test.numpy()) + print(y_0.numpy()) + print(y_1.numpy()) + print(y_01.numpy()) + + """ + if not isinstance(p, (float, int)): + raise TypeError("p argument should be a number") + if p < 0 or p > 1: + raise ValueError("p argument should between 0 and 1") + if mode not in ('downscale_in_infer', 'upscale_in_train'): + raise ValueError( + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") + if axis and not isinstance(axis, (int, list)): + raise TypeError("datatype of axis argument should be int or list") + + if axis == None: # commonly used dropout + seed = None + mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer + + def get_attrs(prog, dropout_prob, is_test, seed): + if (seed is None or seed == 0) and prog.random_seed != 0: + seed = prog.random_seed + attrs = { + 'dropout_prob': dropout_prob, + 'is_test': is_test, + 'fix_seed': seed is not None, + 'seed': seed if seed is not None else 0, + 'dropout_implementation': mode, + } + return attrs + + if in_dygraph_mode(): + if default_main_program().random_seed != 0: + seed = default_main_program().random_seed + out, mask = core.ops.dropout( + x, 'dropout_prob', p, 'is_test', not training, 'fix_seed', + seed is not None, 'seed', seed + if seed is not None else 0, 'dropout_implementation', mode) + return out + + helper = LayerHelper('dropout', **locals()) + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'dropout') + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + mask = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) + + attrs = get_attrs(helper.main_program, p, not training, seed) + + helper.append_op( + type='dropout', + inputs={'X': [x]}, + outputs={'Out': [out], + 'Mask': [mask]}, + attrs=attrs) + return out + else: #sometimes called dropout_nd #TODO: optimize with c++ + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'dropout') + dtype = x.dtype + keep_prob = 1 - p + if training: + if p == 1.: + return layers.scale(x, scale=0.) + + scale_input = layers.scale( + x, scale=1 / keep_prob) if mode == 'upscale_in_train' else x + + #get mask shape + input_shape = x.shape + drop_axes = [axis] if isinstance(axis, int) else axis + if max(drop_axes) > len(input_shape) - 1: + raise ValueError("axis value should less than dimensions of x:{}, but get drop_axes value:{} " \ + .format(len(input_shape), max(drop_axes))) + if len(drop_axes) > len(input_shape): + raise ValueError( + "length of axis should not greater than dimensions of x:{}, but get length of drop axes: {}". + format(len(input_shape), len(drop_axes))) + mask_shape = [1] * len(input_shape) + for i in drop_axes: + mask_shape[i] = input_shape[i] + + #get mask + random_tensor = layers.uniform_random( + mask_shape, dtype='float32', min=0., max=1.0) + p = layers.fill_constant(shape=[1], dtype='float32', value=p) + keep_mask = layers.greater_equal(random_tensor, p) + + scale_input = layers.cast(scale_input, dtype) + keep_mask = layers.cast(keep_mask, dtype) + ret = paddle.multiply(scale_input, keep_mask, name=name) + return ret + else: # test + ret = layers.scale( + x, scale=keep_prob) if mode == 'downscale_in_infer' else x + return ret + + +def dropout2d(x, p=0.5, training=True, data_format='NCHW', name=None): + """ + Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , + a channel is a 2D feature map with the shape `HW` ). Each channel will be zeroed out independently + on every forward call with probability `p` using samples from a Bernoulli distribution. + + See ``paddle.nn.functional.dropout`` for more details. + + Args: + x (Tensor): The input is 4-D Tensor with shape [N, C, H, W] or [N, H, W, C]. + The data type is float32 or float64. + p (float): Probability of setting units to zero. Default 0.5. + training (bool): A flag indicating whether it is in train phrase or not. Default True. + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: + `NCHW` , `NHWC` . The default is `NCHW` . When it is `NCHW` , the data is + stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor representing the dropout2d, has same shape and data type as `x` . + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.random.random(size=(2, 3, 4, 5)).astype('float32') + x = paddle.to_tensor(x) + y_train = paddle.nn.functional.dropout2d(x) #train + y_test = paddle.nn.functional.dropout2d(x, training=False) #test + for i in range(2): + for j in range(3): + print(x.numpy()[i,j,:,:]) + print(y_train.numpy()[i,j,:,:]) # may all 0 + print(y_test.numpy()[i,j,:,:]) + """ + input_shape = x.shape + if len(input_shape) != 4: + raise ValueError("dimensions of x should be 4, but received {} != 4"\ + .format(len(input_shape))) + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + + return dropout( + x, + p=p, + axis=[0, 1] if data_format == 'NCHW' else [0, 3], + training=training, + mode="upscale_in_train", + name=name) + + +def dropout3d(x, p=0.5, training=True, data_format='NCDHW', name=None): + """ + Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , + a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently + on every forward call with probability `p` using samples from a Bernoulli distribution. + + See ``paddle.nn.functional.dropout`` for more details. + + Args: + x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C]. + The data type is float32 or float64. + p (float): Probability of setting units to zero. Default 0.5. + training (bool): A flag indicating whether it is in train phrase or not. Default True. + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: + ``NCDHW``, ``NDHWC``. The default is ``NCDHW`` . When it is ``NCDHW`` , the data is + stored in the order of: [batch_size, input_channels, input_depth, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor representing the dropout3d, has same shape and data type with `x` . + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.random.random(size=(2, 3, 4, 5, 6)).astype('float32') + x = paddle.to_tensor(x) + y_train = paddle.nn.functional.dropout3d(x) #train + y_test = paddle.nn.functional.dropout3d(x, training=False) #test + print(x.numpy()[0,0,:,:,:]) + print(y_train.numpy()[0,0,:,:,:]) # may all 0 + print(y_test.numpy()[0,0,:,:,:]) + """ + + input_shape = x.shape + if len(input_shape) != 5: + raise ValueError("dimensions of x should be 5, but received {} != 5" \ + .format(len(input_shape))) + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + + return dropout( + x, + p=p, + axis=[0, 1] if data_format == 'NCDHW' else [0, 4], + training=training, + mode="upscale_in_train", + name=name) + + +def alpha_dropout(x, p=0.5, training=True, name=None): + """ + Alpha Dropout is a type of Dropout that maintains the self-normalizing property. + For an input with zero mean and unit standard deviation, the output of Alpha Dropout + maintains the original mean and standard deviation of the input. + Alpha Dropout fits well to SELU activate function by randomly setting activations to the negative saturation value. + + Args: + x (Tensor): The input tensor. The data type is float32 or float64. + p (float | int): Probability of setting units to zero. Default 0.5. + training (bool): A flag indicating whether it is in train phrase or not. Default True. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor representing the dropout, has same shape and data type as `x`. + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.array([[-1, 1], [-1, 1]]).astype('float32') + x = paddle.to_tensor(x) + y_train = paddle.nn.functional.alpha_dropout(x, 0.5) + y_test = paddle.nn.functional.alpha_dropout(x, 0.5, training=False) + print(x.numpy()) + print(y_train.numpy()) + # [[-0.10721093, 1.6655989 ], [-0.7791938, -0.7791938]] (randomly) + print(y_test.numpy()) + """ + if not isinstance(p, (float, int)): + raise TypeError("p argument should be a float or int") + if p < 0 or p > 1: + raise ValueError("p argument should between 0 and 1") + + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'alpha_dropout') + + if training: + #get transformation params + alpha = 1.6732632423543772848170429916717 + scale = 1.0507009873554804934193349852946 + alpha_p = -alpha * scale + a = ((1 - p) * (1 + p * alpha_p**2))**-0.5 + b = -a * alpha_p * p + + dtype = x.dtype + input_shape = x.shape + + #get mask + random_tensor = layers.uniform_random( + input_shape, dtype='float32', min=0., max=1.0) + p = layers.fill_constant(shape=[1], dtype='float32', value=p) + keep_mask = layers.greater_equal(random_tensor, p) + keep_mask = layers.cast(keep_mask, dtype) + drop_mask = layers.elementwise_sub( + layers.fill_constant( + shape=input_shape, dtype=dtype, value=1.), + keep_mask) + + #apply mask + b = layers.fill_constant(shape=[1], dtype=dtype, value=b) + y = layers.elementwise_add( + paddle.multiply(x, keep_mask), + layers.scale( + drop_mask, scale=alpha_p)) + res = layers.elementwise_add(layers.scale(y, scale=a), b, name=name) + return res + else: # test + return x + + +def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): + """ + Pad tensor according to 'pad' and 'mode'. + If mode is 'reflect', pad[0] and pad[1] must be no greater + than width-1. The height and depth dimension has the same condition. + + Parameters: + x (Tensor): The input tensor with data type float32/double/int32/int64_t. + pad (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. 1. If input dimension is 3, then the pad has the form (pad_left, + pad_right). 2. If the input dimension is 4, then the pad has the form (pad_left, pad_right, + pad_top, pad_bottom). 3. If the input dimension is 5, then the pad has the form + (pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back). + + mode (str): Four modes: 'constant' (default), 'reflect', 'replicate', 'circular'. + When in 'constant' mode, this op uses a constant value to pad the input tensor. + When in 'reflect' mode, uses reflection of the input boundaries to pad the input tensor. + When in 'replicate' mode, uses input boundaries to pad the input tensor. + When in 'circular' mode, uses circular input to pad the input tensor. + Default is 'constant' + value (float32): The value to fill the padded areas in 'constant' mode . Default is 0.0 + data_format (str): An string from: "NCL", "NLC", NHWC", "NCHW", "NCDHW", "NDHWC". Specify the data format of + the input data. + Default is "NCHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: a Tensor padded according to pad and mode and data type is same as input. + Return Type: Tensor + + Examples: + .. code-block:: text + + x = [[[[[1., 2., 3.], + [4., 5., 6.]]]]] + + Case 0: + pad = [2, 2, 1, 1, 0, 0], + mode = 'constant' + value = 0 + Out = [[[[[0. 0. 0. 0. 0. 0. 0.] + [0. 0. 1. 2. 3. 0. 0.] + [0. 0. 4. 5. 6. 0. 0.] + [0. 0. 0. 0. 0. 0. 0.]]]]] + + Case 1: + pad = [2, 2, 1, 1, 0, 0], + mode = 'reflect' + Out = [[[[[6. 5. 4. 5. 6. 5. 4.] + [3. 2. 1. 2. 3. 2. 1.] + [6. 5. 4. 5. 6. 5. 4.] + [3. 2. 1. 2. 3. 2. 1.]]]]] + + Case 2: + pad = [2, 2, 1, 1, 0, 0], + mode = 'replicate' + Out = [[[[[1. 1. 1. 2. 3. 3. 3.] + [1. 1. 1. 2. 3. 3. 3.] + [4. 4. 4. 5. 6. 6. 6.] + [4. 4. 4. 5. 6. 6. 6.]]]]] + + Case 3: + pad = [2, 2, 1, 1, 0, 0], + mode = 'circular' + Out = [[[[[5. 6. 4. 5. 6. 4. 5.] + [2. 3. 1. 2. 3. 1. 2.] + [5. 6. 4. 5. 6. 4. 5.] + [2. 3. 1. 2. 3. 1. 2.]]]]] + + Code Examples: + .. code-block:: python + import numpy as np + import paddle + import paddle.nn.functional as F + + paddle.disable_static() + + # example 1 + x_shape = (1, 1, 3) + x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) + 1 + tensor_x = paddle.to_tensor(x) + y = F.pad(tensor_x, pad=[2, 3], value=1, mode='constant') + print(y.numpy()) + # [[[1. 1. 1. 2. 3. 1. 1. 1.]]] + + # example 2 + x_shape = (1, 1, 2, 3) + x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) + 1 + tensor_x = paddle.to_tensor(x) + y = F.pad(tensor_x, pad=[1, 2, 1, 1], value=1, mode='circular') + print(y.numpy()) + # [[[[6. 4. 5. 6. 4. 5.] + # [3. 1. 2. 3. 1. 2.] + # [6. 4. 5. 6. 4. 5.] + # [3. 1. 2. 3. 1. 2.]]]] + """ + assert mode in ['reflect', 'replicate', 'constant', 'circular'], \ + "mode should be one of constant, reflect, replicate, circular, but got {}.".format(mode) + + data_format = data_format.upper() + assert data_format in ["NCL", "NCHW", "NCDHW", "NLC", "NHWC", "NDHWC"], \ + "data_format should be in one of [NCL, NCHW, NCDHW, NLC, NHWC, NDHWC], " \ + "but got {}".format(data_format) + + x_dim = len(x.shape) + + original_data_format = data_format + unsqueezed_dim = [] + + if isinstance(pad, Variable): + if data_format in ["NCL", "NCHW", "NCDHW"]: + data_format = "NCDHW" + if x_dim == 3: + pad = concat([zeros((4, ), dtype="int32"), pad], axis=0) + unsqueezed_dim = [3, 4] + x = unsqueeze(x, axes=unsqueezed_dim) + elif x_dim == 4: + pad = concat([pad, zeros((2, ), dtype="int32")], axis=0) + unsqueezed_dim = [2] + x = unsqueeze(x, axes=unsqueezed_dim) + elif data_format in ["NLC", "NHWC", "NDHWC"]: + data_format = "NDHWC" + if x_dim == 3: + pad = concat([zeros((4, ), dtype="int32"), pad], axis=0) + unsqueezed_dim = [2, 3] + x = unsqueeze(x, axes=unsqueezed_dim) + elif x_dim == 4: + pad = concat([pad, zeros((2, ), dtype="int32")], axis=0) + unsqueezed_dim = [1] + x = unsqueeze(x, axes=unsqueezed_dim) + else: + if data_format in ["NCL", "NCHW", "NCDHW"]: + data_format = "NCDHW" + if x_dim == 3: + pad = [0, 0, 0, 0] + pad + unsqueezed_dim = [3, 4] + x = unsqueeze(x, axes=unsqueezed_dim) + elif x_dim == 4: + pad = pad + [0, 0] + unsqueezed_dim = [2] + x = unsqueeze(x, axes=unsqueezed_dim) + elif data_format in ["NLC", "NHWC", "NDHWC"]: + data_format = "NDHWC" + if x_dim == 3: + pad = [0, 0, 0, 0] + pad + unsqueezed_dim = [2, 3] + x = unsqueeze(x, axes=unsqueezed_dim) + elif x_dim == 4: + pad = pad + [0, 0] + unsqueezed_dim = [1] + x = unsqueeze(x, axes=unsqueezed_dim) + + if in_dygraph_mode(): + if isinstance(pad, Variable): + pad = pad.numpy() + out = core.ops.pad3d(x, "paddings", pad, "mode", mode, "value", value, + "data_format", data_format, "name", name) + else: + attrs = {'mode': mode, 'value': value, 'data_format': data_format} + inputs = {'X': [x]} + if isinstance(pad, Variable): + inputs['Paddings'] = [pad] + attrs['paddings'] = [] + else: + attrs['paddings'] = pad + + helper = LayerHelper('pad3d', **locals()) + + dtype = helper.input_dtype(input_param_name='input') + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='pad3d', inputs=inputs, outputs={"Out": out}, attrs=attrs) + + if len(unsqueezed_dim) != 0: + out = squeeze(out, axes=unsqueezed_dim) + + return out + + +def cosine_similarity(x1, x2, axis=1, eps=1e-8): + """ + Compute cosine similarity between x1 and x2 along axis. + + Parameters: + x1 (Tensor): First input. float32/double. + x2 (Tensor): Second input. float32/double. + axis (int): Dimension of vectors to compute cosine similarity. Default is 1. + eps(float): Small value to avoid division by zero. Default is 1e-8. + + Returns: a Tensor representing cosine similarity between x1 and x2 along axis. + Return Type: Tensor + + Examples: + .. code-block:: text + Case 0: + x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ] + [0.48949873 0.5797396 0.65444374 0.66510963] + [0.1031398 0.9614342 0.08365563 0.6796464 ] + [0.10760343 0.7461209 0.7726148 0.5801006 ]] + x2 = [[0.62913156 0.1536727 0.9847992 0.04591406] + [0.9098952 0.15715368 0.8671125 0.3156102 ] + [0.4427798 0.54136837 0.5276275 0.32394758] + [0.3769419 0.8535014 0.48041078 0.9256797 ]] + axis = 1 + eps = 1e-8 + Out: [0.5275037 0.8368967 0.75037485 0.9245899] + + Code Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + np.random.seed(0) + x1 = np.random.rand(2,3) + x2 = np.random.rand(2,3) + x1 = paddle.to_tensor(x1) + x2 = paddle.to_tensor(x2) + result = paddle.nn.functional.cosine_similarity(x1, x2, axis=0) + print(result.numpy()) + # [0.99806249 0.9817672 0.94987036] + + """ + w12 = sum(elementwise_mul(x1, x2), axis=axis) + w1 = sum(elementwise_mul(x1, x1), axis=axis) + w2 = sum(elementwise_mul(x2, x2), axis=axis) + n12 = sqrt(clip(w1 * w2, min=eps * eps)) + cos_sim = w12 / n12 + return cos_sim diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 2a519718258856fe1f4462422a36dccae7066ad1..f80f200c7163836252faa4b1c932178f6bab0dff 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -13,15 +13,24 @@ # limitations under the License. from __future__ import print_function -__all__ = ['conv2d', 'conv2d_transpose', 'conv3d', 'conv3d_transpose'] +__all__ = [ + 'conv1d', + 'conv_transpose1d', + 'conv2d', + 'conv_transpose2d', + 'conv3d', + 'conv_transpose3d', +] import numpy as np +from ...device import get_cudnn_version from ...fluid.framework import Variable, in_dygraph_mode from ...fluid import core, dygraph_utils from ...fluid.layers import nn, utils from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.param_attr import ParamAttr from ...fluid.layer_helper import LayerHelper +from .common import pad2d def _is_list_or_tuple(input): @@ -87,20 +96,242 @@ def _update_padding_nd(padding, channel_last, num_dims): return padding, padding_algorithm -def conv2d(input, +def conv1d(x, weight, bias=None, + stride=1, padding=0, + dilation=1, + groups=1, + data_format='NCL', + name=None): + """ + The convolution1D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input and + Output are in NCL format, where N is batch size, C is the number of + channels, L is the length of the feature. + Filter is in MCK format, where M is the number of output image channels, + C is the number of input image channels, K is the size of the kernel. + If the groups is greater than 1, C will equal the number of input image + channels divided by the groups. If bias attribution and activation type + are provided, bias is added to the output of the convolution, and the + corresponding activation function is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a tensor with NCL format. + * :math:`W`: Kernel value, a tensor with MCK format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, L_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, L_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, L_{out})` + + Where + + .. math:: + + L_{out}&= \\frac{(L_{in} + 2 * padding - (dilation * (L_f - 1) + 1))}{stride} + 1 + + Args: + x (Tensor): The input is 3-D Tensor with shape [N, C, L], the data type + of input is float16 or float32 or float64. + weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is + the number of output channels, g is the number of groups, K is the kernel's size. + bias (Tensor, optional): The bias with shape [M,]. Default: None. + stride (int or tuple, optional): The stride size. If stride is a tuple, it must + contain one integers, (stride_size). Default: 1. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means the feature map is zero paded by size of `padding` on both sides. + 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. + 4. a list[int] or tuple[int] whose length is 2. It has the form [pad_before, pad_after]. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must + contain one integer, (dilation_size). Default: 1. + groups (int, optional): The groups number of the conv1d function. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: 1. + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`. + The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of: + `[batch_size, input_channels, feature_length]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + A tensor representing the conv1d, whose data type is the + same with input. + + Raises: + ValueError: If the channel dimmention of the input is less than or equal to zero. + ValueError: If `data_format` is not "NCL" or "NLC". + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + or the element corresponding to the input's channel is not 0. + ShapeError: If the input is not 3-D Tensor. + ShapeError: If the input's dimension size and filter's dimension size not equal. + ShapeError: If the dimension size of input minus the size of `stride` is not 1. + ShapeError: If the number of input channels is not equal to filter's channels * groups. + ShapeError: If the number of output channels is not be divided by groups. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + x = np.array([[[4, 8, 1, 9], + [7, 2, 0, 9], + [6, 9, 2, 6]]]).astype(np.float32) + w=np.array( + [[[9, 3, 4], + [0, 0, 7], + [2, 5, 6]], + [[0, 3, 4], + [2, 9, 7], + [5, 6, 8]]]).astype(np.float32) + paddle.disable_static() + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv1d(x_var, w_var) + y_np = y_var.numpy() + print(y_np) + + # [[[133. 238.] + # [160. 211.]]] + """ + cudnn_version = get_cudnn_version() + if cudnn_version is not None: + use_cudnn = True + else: + use_cudnn = False + + if data_format not in ["NCL", "NLC"]: + raise ValueError("Attr(data_format) should be 'NCL' or 'NLC'. " + "Received Attr(data_format): {}.".format(data_format)) + + channel_last = (data_format == "NHWC") + channel_dim = -1 if channel_last else 1 + conv2d_data_format = "NHWC" if channel_last else "NCHW" + num_channels = x.shape[channel_dim] + num_filters = weight.shape[0] + if num_channels < 0: + raise ValueError("The channel dimmention of the input({}) " + "should be defined. Received: {}.".format( + x.shape, num_channels)) + if num_channels % groups != 0: + raise ValueError( + "the channel of input must be divisible by groups," + "received: the channel of input is {}, the shape of input is {}" + ", the groups is {}".format(num_channels, x.shape, groups)) + if num_filters % groups != 0: + raise ValueError( + "the number of filters must be divisible by groups," + "received: the number of filters is {}, the shape of weight is {}" + ", the groups is {}".format(num_filters, weight.shape, groups)) + + # update attrs + padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1) + if len(padding) == 2: + padding = padding + [0] * 2 + elif len(padding) == 1: + padding = padding + [0] + else: + raise ValueError( + "The size of padding's dimmention should 1 or 2. But got padding={}". + format(padding)) + + stride = utils.convert_to_list(stride, 1, 'stride') + [1] + dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] + + l_type = "conv2d" + if (num_channels == groups and num_filters % num_channels == 0 and + not use_cudnn): + l_type = 'depthwise_conv2d' + use_cudnn = False + + inputs = {'Input': [x], 'Filter': [weight]} + attrs = { + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False, + 'fuse_relu_before_depthwise_conv': False, + "padding_algorithm": padding_algorithm, + "data_format": conv2d_data_format + } + squeeze_aixs = -2 if channel_last else -1 + x = nn.unsqueeze(input=x, axes=[squeeze_aixs]) + weight = nn.unsqueeze(input=weight, axes=[-1]) + if in_dygraph_mode(): + attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation, + 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False, + 'fuse_relu_before_depthwise_conv', False, "padding_algorithm", + padding_algorithm, "data_format", conv2d_data_format) + out = getattr(core.ops, l_type)(x, weight, *attrs) + if bias is not None: + out = nn.elementwise_add(out, bias, axis=channel_dim) + else: + inputs = {'Input': [x], 'Filter': [weight]} + attrs = { + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False, + 'fuse_relu_before_depthwise_conv': False, + "padding_algorithm": padding_algorithm, + "data_format": conv2d_data_format + } + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'conv2d') + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + out = helper.create_variable_for_type_inference(dtype) + outputs = {"Output": [out]} + helper.append_op( + type=l_type, inputs=inputs, outputs=outputs, attrs=attrs) + if bias is not None: + out = nn.elementwise_add(out, bias, axis=channel_dim) + out = nn.squeeze(input=out, axes=[squeeze_aixs]) + return out + + +def conv2d(x, + weight, + bias=None, stride=1, + padding=0, dilation=1, groups=1, - use_cudnn=True, - act=None, data_format="NCHW", name=None): """ - :alias_main: paddle.nn.functional.conv2d - :alias: paddle.nn.functional.conv2d,paddle.nn.functional.conv.conv2d The convolution2D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input and @@ -152,12 +383,15 @@ def conv2d(input, W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 Args: - input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type + x (Tensor): The input is 4-D Tensor with shape [N, C, H, W], the data type of input is float16 or float32 or float64. - weight (Variable): The convolution kernel with shape [M, C/g, kH, kW], where M is + weight (Tensor): The convolution kernel with shape [M, C/g, kH, kW], where M is the number of output channels, g is the number of groups, kH is the filter's height, kW is the filter's width. - bias (Variable, optional): The bias with shape [M,]. + bias (Tensor, optional): The bias with shape [M,]. + stride (int|tuple): The stride size. It means the stride in convolution. + If stride is a tuple, it must contain two integers, (stride_height, stride_width). + Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension.If `padding` is a string, either 'VALID' or 'SAME' which is the padding algorithm. If padding size is a tuple or list, @@ -168,9 +402,6 @@ def conv2d(input, when `data_format` is `"NHWC"`, `pool_padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride (int|tuple): The stride size. It means the stride in convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). - Otherwise, stride_height = stride_width = stride. Default: stride = 1. dilation (int|tuple): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. @@ -180,10 +411,6 @@ def conv2d(input, the first half of the filters is only connected to the first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups=1. - use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act (str): Activation type, if it is set to None, activation is not appended. - Default: None data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: @@ -193,13 +420,9 @@ def conv2d(input, None by default. Returns: - A Variable holding Tensor representing the conv2d, whose data type is the - same with input. If act is None, the tensor variable storing the convolution - result, and if act is not None, the tensor variable storing convolution - and non-linearity activation result. + A Tensor representing the conv2d result, whose data type is the same with input. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If the channel dimmention of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". @@ -214,62 +437,65 @@ def conv2d(input, Examples: .. code-block:: python - from paddle import fluid + import paddle import paddle.nn.functional as F - import paddle.fluid.dygraph as dg import numpy as np x = np.random.randn(2, 3, 8, 8).astype(np.float32) w = np.random.randn(6, 3, 3, 3).astype(np.float32) - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - w_var = dg.to_variable(w) - y_var = F.conv2d(x_var, w_var, act="relu") - y_np = y_var.numpy() + paddle.disable_static() + + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv2d(x_var, w_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 6, 6) """ # entry checks - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. " - "Received Attr(use_cudnn): {}.".format(use_cudnn)) if data_format not in ["NCHW", "NHWC"]: raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'. " "Received Attr(data_format): {}.".format(data_format)) channel_last = (data_format == "NHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] num_filters = weight.shape[0] if num_channels < 0: raise ValueError("The channel dimmention of the input({}) " "should be defined. Received: {}.".format( - input.shape, num_channels)) + x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," "received: the channel of input is {}, the shape of input is {}" - ", the groups is {}".format(num_channels, input.shape, groups)) + ", the groups is {}".format(num_channels, x.shape, groups)) if num_filters % groups != 0: raise ValueError( "the number of filters must be divisible by groups," "received: the number of filters is {}, the shape of weight is {}" ", the groups is {}".format(num_filters, weight.shape, groups)) + # use_cudnn = True if core.is_compiled_with_cuda() else False + cudnn_version = get_cudnn_version() + + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False + # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) stride = utils.convert_to_list(stride, 2, 'stride') dilation = utils.convert_to_list(dilation, 2, 'dilation') l_type = "conv2d" - if (num_channels == groups and num_filters % num_channels == 0 and - not use_cudnn): + if (num_channels == groups and num_filters % num_channels == 0): l_type = 'depthwise_conv2d' + use_cudnn = False - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { 'strides': stride, 'paddings': padding, @@ -287,15 +513,13 @@ def conv2d(input, 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False, 'fuse_relu_before_depthwise_conv', False, "padding_algorithm", padding_algorithm, "data_format", data_format) - pre_bias = getattr(core.ops, l_type)(input, weight, *attrs) + pre_bias = getattr(core.ops, l_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { 'strides': stride, 'paddings': padding, @@ -307,8 +531,8 @@ def conv2d(input, "padding_algorithm": padding_algorithm, "data_format": data_format } - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], 'conv2d') + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'conv2d') helper = LayerHelper(l_type, **locals()) dtype = helper.input_dtype() pre_bias = helper.create_variable_for_type_inference(dtype) @@ -316,28 +540,279 @@ def conv2d(input, helper.append_op( type=l_type, inputs=inputs, outputs=outputs, attrs=attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias + return out -def conv2d_transpose(input, +def conv_transpose1d(x, weight, bias=None, - output_size=None, - padding=0, stride=1, + padding=0, + output_padding=0, + groups=1, dilation=1, + output_size=None, + data_format="NCL", + name=None): + """ + The 1-D convolution transpose layer calculates the output based on the input, + filter, and dilation, stride, padding. Input(Input) and output(Output) + are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels, + L is the length of the feature. The details of convolution transpose + layer, please refer to the following explanation and references + `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a 3-D Tensor with 'NCL' format or 'NLC' format. + * :math:`W`: Filter value, a 3-D Tensor with 'MCK' format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, a 3-D Tensor with data format 'NCL' or 'NLC', the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, L_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, L_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, L_{out})` + + Where + + .. math:: + + L^\prime_{out} &= (L_{in} - 1) * stride - pad_top - pad_bottom + dilation * (L_f - 1) + 1 + output_padding \\\\ + L_{out} &\in [ L^\prime_{out}, L^\prime_{out} + stride ] + + Note: + The conv1d_transpose can be seen as the backward of the conv1d. For conv1d, + when stride > 1, conv1d maps multiple input shape to the same output shape, + so for conv1d_transpose, when stride > 1, input shape maps multiple output shape. + If output_size is None, :math:`L_{out} = L^\prime_{out}`; + else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}` + and :math:`L^\prime_{out} + stride`. conv1d_transpose can compute the kernel size automatically. + + Args: + x(Tensor): 3-D tensor with [N, C, L] or [N, L, C] format, + its data type is float32 or float64. + weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, K], + where M is the number of output channels(filters), g is the number of groups, + K is the size of the kernel. + bias(Tensor, optional): The bias, a Tensor with shape [M, ]. + stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain one integer, `(stride_size)`. + Default: stride = 1. + padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds + `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a + string, either 'VALID' or 'SAME' supported, which is the padding algorithm. + If `padding` is a tuple or list, it could be in two forms: + `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. + output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. + If it is a tuple, it must contain one integer. Default: 0. + groups(int, optional): The groups number of the conv1d transpose function. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups = 1. + dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain one integer, `(dilation_size)`. + Default: dilation = 1. + output_size(int|tuple|list, optional): The output image size. If output size is a + tuple, it must contain one integer, `(feature_length)`. None if use + filter_size, padding, and stride to calculate output_size. + If output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. output_size and filter_size + should not be None at the same time. + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`. + The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of: + `[batch_size, input_channels, input_length]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + A tensor representing the result of 1-D transpose convolution, whose + data type is the same with input. And its shape is (num_batches, channels, length) + when data_format is `"NCL"` and (num_batches, length, channels) when data_format is + `"NLC"`. + + Raises: + ValueError: If `data_format` is a string, but not "NCL" or "NLC". + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + or the element corresponding to the input's channel is not 0. + ValueError: If `output_size` and filter_size are None at the same time. + ValueError: If `output_padding` is greater than `stride`. + ShapeError: If the input is not 3-D Tensor. + ShapeError: If the input's dimension size and filter's dimension size not equal. + ShapeError: If the dimension size of input minus the size of `stride` is not 1. + ShapeError: If the number of input channels is not equal to filter's channels. + ShapeError: If the size of `output_size` is not equal to that of `stride`. + + Examples: + .. code-block:: python + + + + import paddle + import paddle.nn.functional as F + import numpy as np + + paddle.disable_static() + # shape: (1, 2, 4) + x=np.array([[[4, 0, 9, 7], + [8, 0, 9, 2,]]]).astype(np.float32) + # shape: (2, 1, 2) + y=np.array([[[7, 0]], + [[4, 2]]]).astype(np.float32) + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv_transpose1d(x_var, w_var) + y_np = y_var.numpy() + print y_np + + # [[[60. 16. 99. 75. 4.]]] + """ + cudnn_version = get_cudnn_version() + if cudnn_version is not None: + use_cudnn = True + else: + use_cudnn = False + + if data_format not in ['NCL', 'NLC']: + raise ValueError( + "Attr(data_format) of conv2d_transpose got wrong value: " + "received {}, but only 'NCL' or 'NLC' are supported.".format( + data_format)) + channel_last = (data_format == "NLC") + channel_dim = -1 if channel_last else 1 + + num_channels = x.shape[channel_dim] + if num_channels < 0: + raise ValueError("The channel dimmention of the input({}) " + "should be defined. Received: {}.".format( + x.shape, num_channels)) + if num_channels % groups != 0: + raise ValueError( + "the channel of input must be divisible by groups," + "received: the channel of input is {}, the shape of input is {}" + ", the groups is {}".format(num_channels, x.shape, groups)) + + # update attrs + padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1) + + if len(padding) == 2: + padding = padding + [0] * 2 + elif len(padding) == 1: + padding = padding + [0] + else: + raise ValueError( + "The size of padding's dimmention should 1 or 2. But got padding={}". + format(padding)) + + stride = utils.convert_to_list(stride, 1, 'stride') + [1] + dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] + output_padding = utils.convert_to_list(output_padding, 1, + 'output_padding') + [0] + if output_padding[0] > stride[0]: + raise ValueError( + "The size of output_padding should not be greater than stride." + "But got output_padding={} and stride={}".format(output_padding[0], + stride[0])) + + if output_size is None: + output_size = [] + elif isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 1, 'output_size') + [1] + else: + raise ValueError("output_size should be int, or list, tuple of ints") + + op_type = 'conv2d_transpose' + num_filters = weight.shape[1] + if (num_channels == groups and num_filters == 1 and not use_cudnn): + op_type = 'depthwise_conv2d_transpose' + use_cudnn = False + + squeeze_axis = -2 if channel_last else -1 + conv2d_data_format = "NHWC" if channel_last else "NCHW" + + x = nn.unsqueeze(input=x, axes=[squeeze_axis]) + weight = nn.unsqueeze(input=weight, axes=[-1]) + + if in_dygraph_mode(): + attrs = ('output_size', output_size, 'strides', stride, 'paddings', + padding, 'padding_algorithm', padding_algorithm, 'dilations', + dilation, 'groups', groups, 'use_cudnn', use_cudnn, + 'data_format', conv2d_data_format) + out = getattr(core.ops, op_type)(x, weight, *attrs) + if bias is not None: + out = nn.elementwise_add(out, bias, axis=channel_dim) + else: + inputs = {'Input': [x], 'Filter': [weight]} + attrs = { + 'output_size': output_size, + 'strides': stride, + 'paddings': padding, + 'padding_algorithm': padding_algorithm, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'data_format': conv2d_data_format + } + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'conv2d_transpose') + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + out = helper.create_variable_for_type_inference(dtype) + outputs = {"Output": [out]} + helper.append_op( + type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + if bias is not None: + out = nn.elementwise_add(out, bias, axis=channel_dim) + + if output_size is None: + out = pad2d( + out, + padding=[0, output_padding, 0, 0], + data_format=conv2d_data_format, + name=name) + out = nn.squeeze(input=out, axes=[squeeze_axis]) + return out + + +def conv_transpose2d(x, + weight, + bias=None, + stride=1, + padding=0, + output_padding=0, groups=1, - use_cudnn=True, - act=None, + dilation=1, data_format='NCHW', + output_size=None, name=None): """ - :alias_main: paddle.nn.functional.conv2d_transpose - :alias: paddle.nn.functional.conv2d_transpose,paddle.nn.functional.conv.conv2d_transpose The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) @@ -350,6 +825,7 @@ def conv2d_transpose(input, If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. + See more detail in :ref:`api_nn_conv_ConvTranspose2d` . For each input :math:`X`, the equation is: @@ -398,18 +874,15 @@ def conv2d_transpose(input, conv2d_transpose can compute the kernel size automatically. Args: - input(Variable): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format, + x(Tensor): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format, whose data type is float32 or float64. - weight(Variable): The convolution kernel, a Tensor with shape [C, M/g, kH, kW], + weight(Tensor): The convolution kernel, a Tensor with shape [C, M/g, kH, kW], where M is the number of output channels(filters), g is the number of groups, kH is the height of the kernel, and kW is the width of the kernel. - bias(Variable, optional): The bias, a Tensor with shape [M, ]. - output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_height, image_width). None if use - filter_size, padding, and stride to calculate output_size. - If output_size is specified, output_size and filter_size (weight)'s shape - should follow the formula above. Default: None. output_size and filter_size - should not be None at the same time. + bias(Tensor, optional): The bias, a Tensor with shape [M, ]. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain two integers, (stride_height, stride_width). + Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. @@ -421,10 +894,9 @@ def conv2d_transpose(input, when `data_format` is `'NHWC'`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). - Otherwise, stride_height = stride_width = stride. Default: stride = 1. - dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by @@ -433,10 +905,12 @@ def conv2d_transpose(input, first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups = 1. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. + output_size(int|tuple|list, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_height, image_width). None if use + filter_size, padding, and stride to calculate output_size. + If output_size is specified, output_size and filter_size (weight)'s shape + should follow the formula above. Default: None. output_size and filter_size + should not be None at the same time. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: @@ -446,20 +920,17 @@ def conv2d_transpose(input, None by default. Returns: - A Variable holding Tensor representing the conv2d_transpose, whose + A Tensor representing the conv_transpose2d, whose data type is the same with input and shape is (num_batches, channels, out_h, - out_w) or (num_batches, out_h, out_w, channels). If act is None, the tensor variable - storing the transposed convolution result, and if act is not None, the - tensor variable storing transposed convolution and non-linearity activation - result. + out_w) or (num_batches, out_h, out_w, channels). The tensor variable storing + transposed convolution result. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. - ValueError: If `output_size` and filter_size are None at the same time. + ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 4-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. ShapeError: If the dimension size of input minus the size of `stride` is not 2. @@ -469,28 +940,23 @@ def conv2d_transpose(input, Examples: .. code-block:: python - from paddle import fluid - import paddle.nn.functional as F - import paddle.fluid.dygraph as dg import numpy as np + import paddle + import paddle.nn.functional as F x = np.random.randn(2, 3, 8, 8).astype(np.float32) w = np.random.randn(3, 6, 3, 3).astype(np.float32) - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - w_var = dg.to_variable(w) - y_var = F.conv2d_transpose(x_var, w_var, act="relu") - y_np = y_var.numpy() + paddle.disable_static() + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv2d_transpose(x_var, w_var) + y_np = y_var.numpy() print(y_np.shape) # (2, 6, 10, 10) """ - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. " - "Received Attr(use_cudnn): {}.".format(use_cudnn)) if data_format not in ['NCHW', 'NHWC']: raise ValueError( "Attr(data_format) of conv2d_transpose got wrong value: " @@ -498,48 +964,65 @@ def conv2d_transpose(input, data_format)) channel_last = (data_format == "NHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] if num_channels < 0: raise ValueError("The channel dimmention of the input({}) " "should be defined. Received: {}.".format( - input.shape, num_channels)) + x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "the channel of input must be divisible by groups," "received: the channel of input is {}, the shape of input is {}" - ", the groups is {}".format(num_channels, input.shape, groups)) + ", the groups is {}".format(num_channels, x.shape, groups)) + + cudnn_version = get_cudnn_version() + + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) stride = utils.convert_to_list(stride, 2, 'stride') dilation = utils.convert_to_list(dilation, 2, 'dilation') + if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 2, 'output_size') else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 2, 'output_size') + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 2, + 'output_padding') op_type = 'conv2d_transpose' num_filters = weight.shape[1] - if (num_channels == groups and num_filters == 1 and not use_cudnn): + if (num_channels == groups and num_filters == 1): op_type = 'depthwise_conv2d_transpose' + use_cudnn = False if in_dygraph_mode(): - attrs = ('output_size', output_size, 'strides', stride, 'paddings', - padding, 'padding_algorithm', padding_algorithm, 'dilations', - dilation, 'groups', groups, 'use_cudnn', use_cudnn, - 'data_format', data_format) - pre_bias = getattr(core.ops, op_type)(input, weight, *attrs) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'strides', stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, 'data_format', data_format) + pre_bias = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'strides': stride, 'paddings': padding, @@ -549,37 +1032,32 @@ def conv2d_transpose(input, 'use_cudnn': use_cudnn, 'data_format': data_format } - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'conv2d_transpose') helper = LayerHelper(op_type, **locals()) - dtype = helper.input_dtype() - pre_bias = helper.create_variable_for_type_inference(dtype) + pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) + if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias + return out -def conv3d(input, +def conv3d(x, weight, bias=None, - padding=0, stride=1, + padding=0, dilation=1, groups=1, - use_cudnn=True, - act=None, data_format="NCDHW", name=None): """ - :alias_main: paddle.nn.functional.conv3d - :alias: paddle.nn.functional.conv3d,paddle.nn.functional.conv.conv3d The convolution3D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input(Input) and @@ -625,12 +1103,15 @@ def conv3d(input, W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 Args: - input (Variable): The input is 5-D Tensor with shape [N, C, D, H, W], the data + x (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W], the data type of input is float16 or float32 or float64. weight (Variable): The convolution kernel, a Tensor with shape [M, C/g, kD, kH, kW], where M is the number of filters(output channels), g is the number of groups, kD, kH, kW are the filter's depth, height and width respectively. - bias (Variable, optional): The bias, a Tensor of shape [M, ]. + bias (Tensor, optional): The bias, a Tensor of shape [M, ]. + stride (int|tuple): The stride size. It means the stride in convolution. If stride is a + tuple, it must contain three integers, (stride_depth, stride_height, stride_width). + Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension. If `padding` is a string, either 'VALID' or 'SAME' which is the padding algorithm. If padding size is a tuple or list, @@ -641,9 +1122,6 @@ def conv3d(input, when `data_format` is `"NDHWC"`, `pool_padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride (int|tuple): The stride size. It means the stride in convolution. If stride is a - tuple, it must contain three integers, (stride_depth, stride_height, stride_width). - Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. dilation (int|tuple): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. @@ -653,10 +1131,6 @@ def conv3d(input, the first half of the filters is only connected to the first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups=1 - use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act (str): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: @@ -666,13 +1140,12 @@ def conv3d(input, None by default. Returns: - A Variable holding Tensor representing the conv3d, whose data type is + A Tensor representing the conv3d, whose data type is the same with input. If act is None, the tensor variable storing the convolution result, and if act is not None, the tensor variable storing convolution and non-linearity activation result. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If the channel dimmention of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". @@ -706,10 +1179,6 @@ def conv3d(input, # (2, 6, 6, 6, 6) """ # entry check - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. Received " - "Attr(use_cudnn): {}. ".format(use_cudnn)) - if data_format not in ["NCDHW", "NDHWC"]: raise ValueError( "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " @@ -717,12 +1186,12 @@ def conv3d(input, channel_last = (data_format == "NDHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] num_filters = weight.shape[0] if num_channels < 0: raise ValueError( "The channel dimmention of the input({}) should be defined. " - "Received: {}.".format(input.shape, num_channels)) + "Received: {}.".format(x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "The number of input channels must be divisible by Attr(groups). " @@ -734,6 +1203,10 @@ def conv3d(input, "Received: number of filters({}), groups({}).".format(num_filters, groups)) + cudnn_version = get_cudnn_version() + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False + padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3) stride = utils.convert_to_list(stride, 3, 'stride') dilation = utils.convert_to_list(dilation, 3, 'dilation') @@ -744,15 +1217,13 @@ def conv3d(input, 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False, "padding_algorithm", padding_algorithm, "data_format", data_format) - pre_bias = getattr(core.ops, op_type)(input, weight, *attrs) + pre_bias = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { 'strides': stride, 'paddings': padding, @@ -765,8 +1236,8 @@ def conv3d(input, } helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], 'conv3d') + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'conv3d') pre_bias = helper.create_variable_for_type_inference(dtype) outputs = {"Output": [pre_bias]} @@ -774,31 +1245,26 @@ def conv3d(input, helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias return out -def conv3d_transpose(input, +def conv_transpose3d(x, weight, bias=None, - output_size=None, - padding=0, stride=1, - dilation=1, + padding=0, + output_padding=0, groups=1, - use_cudnn=True, - act=None, + dilation=1, data_format='NCDHW', + output_size=None, name=None): """ - :alias_main: paddle.nn.functional.conv3d_transpose - :alias: paddle.nn.functional.conv3d_transpose,paddle.nn.functional.conv.conv3d_transpose - - The convolution3D transpose layer calculates the output based on the input, + The convolution3d transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels, D is the depth of the feature, H is the height of the feature, and W @@ -809,6 +1275,7 @@ def conv3d_transpose(input, If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. + See more detail in :ref:`api_nn_conv_ConvTranspose3d` . For each input :math:`X`, the equation is: @@ -861,17 +1328,16 @@ def conv3d_transpose(input, conv3d_transpose can compute the kernel size automatically. Args: - input(Variable): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type + x(Tensor): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type of input is float32 or float64. - weight (Variable): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW], + weight (Tensor): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW], where M is the number of filters(output channels), g is the number of groups, kD, kH, kW are the filter's depth, height and width respectively. - bias (Variable, optional): The bias, a Tensor of shape [M, ]. - output_size(int|tuple, optional): The output image size. If output size is a - tuple, it must contain three integers, (image_depth, image_height, image_width). This - parameter only works when filter_size is None. If output_size and filter_size are - specified at the same time, They should follow the formula above. Default: None. - Output_size and filter_size should not be None at the same time. + bias (Tensor, optional): The bias, a Tensor of shape [M, ]. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` @@ -882,11 +1348,9 @@ def conv3d_transpose(input, when `data_format` is `'NDHWC'`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, - stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. - Default: stride = 1. - dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. @@ -896,32 +1360,32 @@ def conv3d_transpose(input, first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: groups=1 - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. + output_size(int|list|tuple, optional): The output image size. If output size is a + tuple, it must contain three integers, (image_depth, image_height, image_width). This + parameter only works when filter_size is None. If output_size and filter_size are + specified at the same time, They should follow the formula above. Default: None. + Output_size and filter_size should not be None at the same time. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: - A Variable holding Tensor representing the conv3d_transpose, whose data + A Tensor representing the conv_transpose3d, whose data type is the same with input and shape is (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor variable storing the transposed convolution result, and if act is not None, the tensor variable storing transposed convolution and non-linearity activation result. Raises: - ValueError: If the type of `use_cudnn` is not bool. ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. - ValueError: If `output_size` and filter_size are None at the same time. + ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 5-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. ShapeError: If the dimension size of input minus the size of `stride` is not 2. @@ -930,29 +1394,26 @@ def conv3d_transpose(input, Examples: .. code-block:: python + + import numpy as np - from paddle import fluid + import paddle import paddle.nn.functional as F - import paddle.fluid.dygraph as dg - import numpy as np x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32) w = np.random.randn(3, 6, 3, 3, 3).astype(np.float32) - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - w_var = dg.to_variable(w) - y_var = F.conv3d_transpose(x_var, w_var, act="relu") - y_np = y_var.numpy() + paddle.disable_static() + + x_var = paddle.to_tensor(x) + w_var = paddle.to_tensor(w) + y_var = F.conv_transpose3d(x_var, w_var) + y_np = y_var.numpy() print(y_np.shape) # (2, 6, 10, 10, 10) """ # entry checks - if not isinstance(use_cudnn, bool): - raise ValueError("Attr(use_cudnn) should be True or False. " - "Received Attr(use_cudnn): {}.".format(use_cudnn)) if data_format not in ["NCDHW", "NDHWC"]: raise ValueError( "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " @@ -960,12 +1421,12 @@ def conv3d_transpose(input, channel_last = (data_format == "NDHWC") channel_dim = -1 if channel_last else 1 - num_channels = input.shape[channel_dim] + num_channels = x.shape[channel_dim] num_filters = weight.shape[1] if num_channels < 0: raise ValueError( "The channel dimmention of the input({}) should be defined. " - "Received: {}.".format(input.shape, num_channels)) + "Received: {}.".format(x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( "The number of input channels must be divisible by Attr(groups). " @@ -977,29 +1438,45 @@ def conv3d_transpose(input, dilation = utils.convert_to_list(dilation, 3, 'dilation') if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 3, 'output_size') else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 3, 'output_size') + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 3, + 'output_padding') + + cudnn_version = get_cudnn_version() + + #TODO(LielinJiang): whether to use cudnn according to the version of cudnn + use_cudnn = True if (core.is_compiled_with_cuda() and + cudnn_version is not None) else False op_type = 'conv3d_transpose' data_format_ = "NHWC" if channel_last else "NCHW" if in_dygraph_mode(): - attrs = ('output_size', output_size, 'paddings', padding, - "padding_algorithm", padding_algorithm, 'strides', stride, - 'dilations', dilation, 'groups', groups, 'use_cudnn', - use_cudnn, "data_format", data_format_) - pre_bias = getattr(core.ops, op_type)(input, weight, *attrs) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'paddings', padding, "padding_algorithm", padding_algorithm, + 'strides', stride, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, "data_format", data_format_) + pre_bias = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = dygraph_utils._append_activation_in_dygraph( - pre_act, act, use_cudnn=use_cudnn) + out = pre_bias else: - inputs = {'Input': [input], 'Filter': [weight]} + inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'paddings': padding, "padding_algorithm": padding_algorithm, @@ -1010,19 +1487,17 @@ def conv3d_transpose(input, "data_format": data_format_ } helper = LayerHelper(op_type, **locals()) - dtype = helper.input_dtype() - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], 'conv3d') + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], + 'conv3d') - pre_bias = helper.create_variable_for_type_inference(dtype) + pre_bias = helper.create_variable_for_type_inference(x.dtype) outputs = {"Output": [pre_bias]} helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=attrs) if bias is not None: - pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim) + out = nn.elementwise_add(pre_bias, bias, axis=channel_dim) else: - pre_act = pre_bias - out = helper.append_activation(pre_act) + out = pre_bias return out diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py new file mode 100644 index 0000000000000000000000000000000000000000..e77bf0e39672984f7076938b134f3e54f4c761ab --- /dev/null +++ b/python/paddle/nn/functional/input.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import warnings +from ...fluid.framework import Variable, in_dygraph_mode +from ...fluid.layer_helper import LayerHelper +from ...fluid.layers import core +from ...fluid.data_feeder import check_variable_and_dtype, check_dtype + +__all__ = ['one_hot'] + + +def one_hot(x, num_classes, name=None): + """ + + The operator converts each id in the input 'x' to an one-hot vector with a + num_classes length. The value in the vector dimension corresponding to the id + is 1, and the value in the remaining dimension is 0. + + The shape of output Tensor is generated by appending num_classes dimension + behind the last dimension of the 'x' shape. + + .. code-block:: text + + Example 1: + + input: + x.shape = [4] + x.data = [1, 1, 3, 0] + num_classes = 4 + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] + + Example 2: + + input: + x.shape = [4] + x.data = [1, 1, 5, 0] + num_classes = 4 + + output: Throw an exception for Illegal value + The second dimension in X is 5, which is greater than num_classes, + so it throws an exception. + + + Args: + x(Tensor): Tensor with shape :math:`[N_1, N_2, ..., N_k]` , + which contains at least one dimension. The data type is int32 or int64. + num_classes(int): An integer defining the num_classes of the one hot dimension. If input 'x' + is word id, num_classes is generally the dictionary size. + + Returns: + Tensor: The one-hot representations of 'x'. A Tensor with type float32. + + Examples: + .. code-block:: python + + import paddle + # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4]. + label = paddle.data(name="label", shape=[4, 1], dtype="int64") + # label.shape = [4] + # label.data = [1, 1, 3, 0] + one_hot_label = paddle.nn.functional.one_hot(x=label, num_classes=4) + # one_hot_label.shape = [4, 4] + # one_hot_label.data = [[0., 1., 0., 0.], + # [0., 1., 0., 0.], + # [0., 0., 0., 1.], + # [1., 0., 0., 0.]] + """ + + if in_dygraph_mode(): + return core.ops.one_hot_v2(x, 'depth', num_classes, + 'allow_out_of_range', False) + else: + check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'one_hot_v2') + helper = LayerHelper("one_hot_v2", **locals()) + + one_hot_out = helper.create_variable_for_type_inference(dtype='float32') + if not isinstance(num_classes, Variable): + # user attribute + inputs = {'X': x} + attrs = {'depth': num_classes, 'allow_out_of_range': False} + else: + num_classes.stop_gradient = True + inputs = {'X': x, 'depth_tensor': num_classes} + attrs = {'allow_out_of_range': False} + helper.append_op( + type="one_hot_v2", + inputs=inputs, + attrs=attrs, + outputs={'Out': one_hot_out}, + stop_gradient=True) + return one_hot_out diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 85ca043a10cca8dfaab2a4dcf724030fd505a7c1..55bb36d136405385a88b991576c2a9091437d456 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define loss functions of neural network +import paddle +from ...fluid.layer_helper import LayerHelper +from ...fluid.data_feeder import check_variable_and_dtype +import paddle.fluid as fluid + +# TODO: define loss functions of neural network import numpy as np import paddle import paddle.fluid as fluid @@ -20,12 +25,9 @@ from ...fluid.framework import core, in_dygraph_mode from ...fluid.layers.nn import _elementwise_op_in_dygraph from ...fluid.layers import bpr_loss #DEFINE_ALIAS from ...fluid.layers import center_loss #DEFINE_ALIAS -from ...fluid.layers import cross_entropy #DEFINE_ALIAS from ...fluid.layers import dice_loss #DEFINE_ALIAS from ...fluid.layers import iou_similarity #DEFINE_ALIAS -from ...fluid.layers import kldiv_loss #DEFINE_ALIAS from ...fluid.layers import log_loss #DEFINE_ALIAS -from ...fluid.layers import mse_loss #DEFINE_ALIAS from ...fluid.layers import npair_loss #DEFINE_ALIAS from ...fluid.layers import rank_loss #DEFINE_ALIAS from ...fluid.layers import reshape @@ -41,9 +43,13 @@ from ...fluid.layers import edit_distance #DEFINE_ALIAS from ...fluid.layers import huber_loss #DEFINE_ALIAS from ...fluid.layers import sampled_softmax_with_cross_entropy #DEFINE_ALIAS from ...fluid.layer_helper import LayerHelper +from ...fluid.framework import in_dygraph_mode +from ...fluid.framework import _varbase_creator from ...fluid.framework import Variable __all__ = [ + 'binary_cross_entropy', + 'binary_cross_entropy_with_logits', 'bpr_loss', 'center_loss', 'cross_entropy', @@ -51,7 +57,7 @@ __all__ = [ 'edit_distance', 'huber_loss', 'iou_similarity', - 'kldiv_loss', + 'kl_div', 'l1_loss', 'log_loss', 'mse_loss', @@ -64,25 +70,376 @@ __all__ = [ 'sigmoid_cross_entropy_with_logits', 'sigmoid_focal_loss', 'smooth_l1', + 'smooth_l1_loss', 'softmax_with_cross_entropy', 'square_error_cost', 'ssd_loss', - 'teacher_student_sigmoid_loss' + 'teacher_student_sigmoid_loss', + 'ctc_loss', ] +def binary_cross_entropy(input, label, weight=None, reduction='mean', + name=None): + """ + This op measures the binary_cross_entropy loss between input predictions ``input`` + and target labels ``label`` . The binary_cross_entropy loss can be described as: + + If :attr:`weight` is set, the loss is: + + .. math:: + Out = -1 * weight * (label * log(input) + (1 - label) * log(1 - input)) + + If :attr:`weight` is None, the loss is: + + .. math:: + Out = -1 * (label * log(input) + (1 - label) * log(1 - input)) + + If :attr:`reduction` set to ``'none'``, the interface will return the original loss `Out`. + + If :attr:`reduction` set to ``'mean'``, the reduced mean loss is: + + .. math:: + Out = MEAN(Out) + + If :attr:`reduction` set to ``'sum'``, the reduced sum loss is: + + .. math:: + Out = SUM(Out) + + Note that the input predictions ``input`` always be the output of sigmoid, and the target labels ``label`` + should be numbers between 0 and 1. + + Parameters: + input (Tensor): The input predications tensor. 2-D tensor with shape: [N, *], + N is batch_size, `*` means number of additional dimensions. The ``input`` + should always be the output of sigmod. Available dtype is float32, float64. + label (Tensor): The target labels tensor. 2-D tensor with the same shape as + ``input``. The target labels which values should be numbers between 0 and 1. + Available dtype is float32, float64. + weight (Tensor, optional): A manual rescaling weight given to the loss of each + batch element. If given, has to be a Tensor of size nbatch and the data type + is float32, float64. Default is ``'None'``. + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the summed loss is returned. + Default is ``'mean'``. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + + Returns: + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is + same as ``input`` , else the shape of output is scalar. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + input_data = np.array([0.5, 0.6, 0.7]).astype("float32") + label_data = np.array([1.0, 0.0, 1.0]).astype("float32") + + paddle.disable_static() + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) + output = paddle.nn.functional.binary_cross_entropy(input, label) + print(output.numpy()) # [0.65537095] + paddle.enable_static() + + """ + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in binary_cross_entropy should be 'sum', " + "'mean' or 'none', but received %s, which is not allowed." % + reduction) + + if in_dygraph_mode(): + out = core.ops.bce_loss(input, label) + if weight is not None: + out = core.ops.elementwise_mul(out, weight, 'axis', -1) + + if reduction == 'sum': + return core.ops.reduce_sum(out, 'dim', [0], 'keep_dim', False, + "reduce_all", True) + elif reduction == 'mean': + return core.ops.reduce_mean(out, 'dim', [0], 'keep_dim', False, + "reduce_all", True) + else: + return out + + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'binary_cross_entropy') + fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['float32', 'float64'], 'binary_cross_entropy') + + sub_name = name if weight is None and reduction is 'none' else None + helper = LayerHelper("binary_cross_entropy", name=sub_name) + out = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='bce_loss', + inputs={ + 'X': [input], + 'Label': [label], + }, + outputs={'Out': [out]}) + + if weight is not None: + if isinstance(weight, paddle.framework.Variable): + weight_name = name if reduction is 'none' else None + out = paddle.multiply(out, weight, axis=-1, name=weight_name) + else: + raise ValueError( + "The weight is not a Tensor, please convert to Tensor.") + + if reduction == 'sum': + return paddle.sum(out, name=name) + elif reduction == 'mean': + return paddle.mean(out, name=name) + else: + return out + + +def binary_cross_entropy_with_logits(logit, + label, + weight=None, + reduction='mean', + pos_weight=None, + name=None): + """ + This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. + Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` + layer and some reduce operations. + + This measures the element-wise probability error in classification tasks + in which each class is independent. + This can be thought of as predicting labels for a data-point, where labels + are not mutually exclusive. For example, a news article can be about + politics, technology or sports at the same time or none of these. + + First this operator calculate loss function as follows: + + .. math:: + Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit)) + + We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\e^{-Logit}}`. By substituting this we get: + + .. math:: + Out = Logit - Logit * Labels + \\log(1 + \\e^{-Logit}) + + For stability and to prevent overflow of :math:`\\e^{-Logit}` when Logit < 0, + we reformulate the loss as follows: + + .. math:: + Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + \\e^{-\|Logit\|}) + + Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the + weight tensor on the loss `Out`. The ``weight`` tensor will attach different + weight on every items in the batch. The ``pos_weight`` will attach different + weight on the positive label of each class. + + Finally, this operator applies reduce operation on the loss. + If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`. + If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`. + If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`. + + Note that the target labels ``label`` should be numbers between 0 and 1. + + Args: + logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *], + N is batch_size, `*` means number of additional dimensions. The ``logit`` + is usually the output of Linear layer. Available dtype is float32, float64. + label (Tensor): The target labels tensor. 2-D tensor with the same shape as + ``logit``. The target labels which values should be numbers between 0 and 1. + Available dtype is float32, float64. + weight (Tensor, optional): A manual rescaling weight given to the loss of each + batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`, + The data type is float32, float64. Default is ``'None'``. + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the summed loss is returned. + Default is ``'mean'``. + pos_weight (Tensor, optional): A weight of positive examples. Must be a vector + with length equal to the number of classes. The data type is float32, float64. + Default is ``'None'``. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is + same as ``logit`` , else the shape of output is scalar. + + Examples: + + .. code-block:: python + + import paddle + paddle.disable_static() + logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") + label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") + output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label) + print(output.numpy()) # [0.45618808] + + """ + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in binary_cross_entropy_with_logits " + "should be 'sum', 'mean' or 'none', but received %s, which is not allowed." + % reduction) + + if in_dygraph_mode(): + one = _varbase_creator(dtype=logit.dtype) + core.ops.fill_constant(one, 'value', + float(1.0), 'force_cpu', False, 'dtype', + one.dtype, 'str_value', '1.0', 'shape', [1]) + out = core.ops.sigmoid_cross_entropy_with_logits(logit, label) + if pos_weight is not None: + log_weight = core.ops.elementwise_add( + core.ops.elementwise_mul( + label, core.ops.elementwise_sub(pos_weight, one)), one) + out = core.ops.elementwise_mul(out, log_weight) + if weight is not None: + out = core.ops.elementwise_mul(out, weight) + + if reduction == "sum": + return core.ops.reduce_sum(out, 'reduce_all', True) + elif reduction == "mean": + return core.ops.mean(out) + else: + return out + + fluid.data_feeder.check_variable_and_dtype( + logit, 'logit', ['float32', 'float64'], + 'binary_cross_entropy_with_logits') + fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['float32', 'float64'], + 'binary_cross_entropy_with_logits') + sigmoid_name = None + if reduction == 'none' and pos_weight is None and weight is None: + sigmoid_name = name + + out = paddle.nn.functional.sigmoid_cross_entropy_with_logits( + logit, label, name=sigmoid_name) + + one = paddle.fill_constant(shape=[1], value=1.0, dtype=logit.dtype) + if pos_weight is not None: + fluid.data_feeder.check_variable_and_dtype( + pos_weight, 'pos_weight', ['float32', 'float64'], + 'binary_cross_entropy_with_logits') + log_weight = paddle.add( + paddle.multiply(label, paddle.elementwise_sub(pos_weight, one)), + one) + pos_weight_name = name if reduction == 'none' and weight is None else None + out = paddle.multiply(out, log_weight, name=pos_weight_name) + + if weight is not None: + fluid.data_feeder.check_variable_and_dtype( + weight, 'weight', ['float32', 'float64'], + 'binary_cross_entropy_with_logits') + weight_name = name if reduction == 'none' else None + out = paddle.multiply(out, weight, name=weight_name) + + if reduction == "sum": + return paddle.sum(out, name=name) + elif reduction == "mean": + return paddle.mean(out, name=name) + return out + + +def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): + """ + This operator calculates smooth_l1_loss. Creates a criterion that uses a squared + term if the absolute element-wise error falls below 1 and an L1 term otherwise. + In some cases it can prevent exploding gradients and it is more robust and less + sensitivity to outliers. Also known as the Huber loss: + + .. math:: + + loss(x,y)=\\frac{1}{n}\\sum_{i}z_i + + + where z_i is given by: + + .. math:: + + \\mathop{z_i}=\\left\\{\\begin{array}{rcl} + 0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\ + delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise} + \\end{array} \\right. + + Parameters: + input (Tensor): Input tensor, the data type is float32 or float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Tensor): Label tensor, the data type is float32 or float64. The shape of label + is the same as the shape of input. + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + Default is ``'mean'``. + delta (float, optional): Specifies the hyperparameter delta to be used. + The value determines how large the errors need to be to use L1. Errors + smaller than delta are minimized with L2. Parameter is ignored for + negative/zero values. Default = 1.0 + name (str, optional): Name for the operation (optional, default is + None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + The tensor variable storing the smooth_l1_loss of input and label. + + Return type: Tensor. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + input_data = np.random.rand(3,3).astype("float32") + label_data = np.random.rand(3,3).astype("float32") + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) + output = paddle.nn.functioanl.smooth_l1_loss(input, label) + print(output.numpy()) + """ + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'smooth_l1_loss') + fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['float32', 'float64'], 'smooth_l1_loss') + + out = huber_loss(input=input, label=label, delta=delta) + + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in smooth_l1_loss should be 'sum', 'mean' or" + " 'none', but received %s, which is not allowed." % reduction) + if reduction == 'none': + return out + elif reduction == 'mean': + return fluid.layers.reduce_mean(out) + elif reduction == 'sum': + return fluid.layers.reduce_sum(out) + + def margin_ranking_loss(input, other, - target, + label, margin=0.0, reduction='mean', name=None): """ - This op the calcluate the the margin rank loss between the input x, y and target, use the math function as follows. + This op the calcluate the the margin rank loss between the input, other and label, use the math function as follows. - .. math:: - margin\_rank\_loss = max(0, -target * (input - other) + margin) + .. math:: + margin\_rank\_loss = max(0, -label * (input - other) + margin) If :attr:`reduction` set to ``'mean'``, the reduced mean loss is: @@ -99,7 +456,7 @@ def margin_ranking_loss(input, Parameters: input(Tensor): the first input tensor, it's data type should be float32, float64. other(Tensor): the second input tensor, it's data type should be float32, float64. - target(Tensor): the target value corresponding to input, it's data type should be float32, float64. + label(Tensor): the label value corresponding to input, it's data type should be float32, float64. margin (float, optional): The margin value to add, default value is 0; reduction (str, optional): Indicate the reduction to apply to the loss, the candicates are ``'none'``, ``'mean'``, ``'sum'``.If :attr:`reduction` is ``'none'``, the unreduced loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -110,20 +467,24 @@ def margin_ranking_loss(input, .. code-block:: python - import numpy as np - import paddle - + import numpy as np + import paddle + paddle.disable_static() - - x = paddle.to_variable(np.array([[1, 2], [3, 4]]).astype('float32')) - y = paddle.to_variable(np.array([[2, 1], [2, 4]]).astype('float32')) - target = paddle.to_variable(np.array([[1, -1], [-1, -1]]).astype('float32')) - loss = paddle.nn.functional.margin_ranking_loss(x, y, target) + + input = paddle.to_variable(np.array([[1, 2], [3, 4]]).astype('float32')) + other = paddle.to_variable(np.array([[2, 1], [2, 4]]).astype('float32')) + label = paddle.to_variable(np.array([[1, -1], [-1, -1]]).astype('float32')) + loss = paddle.nn.functional.margin_ranking_loss(input, other, label) print(loss.numpy()) # [0.75] """ + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in MarginRankingLoss should be 'sum', 'mean' or 'none', but " + "received %s, which is not allowed." % reduction) if fluid.framework.in_dygraph_mode(): out = core.ops.elementwise_sub(other, input) - out = core.ops.elementwise_mul(out, target) + out = core.ops.elementwise_mul(out, label) if margin != 0.0: margin = fluid.dygraph.base.to_variable([margin], dtype=out.dtype) out = core.ops.elementwise_add(out, margin) @@ -140,10 +501,10 @@ def margin_ranking_loss(input, fluid.data_feeder.check_variable_and_dtype( other, 'other', ['float32', 'float64'], 'margin_rank_loss') fluid.data_feeder.check_variable_and_dtype( - target, 'target', ['float32', 'float64'], 'margin_rank_loss') + label, 'label', ['float32', 'float64'], 'margin_rank_loss') out = paddle.elementwise_sub(other, input) - out = paddle.multiply(out, target) + out = paddle.multiply(out, label) if margin != 0.0: margin_var = out.block.create_var(dtype=out.dtype) @@ -175,62 +536,62 @@ def margin_ranking_loss(input, return result_out -def l1_loss(x, label, reduction='mean', name=None): +def l1_loss(input, label, reduction='mean', name=None): """ - This operator computes the L1 Loss of Tensor ``x`` and ``label`` as follows. + This operator computes the L1 Loss of Tensor ``input`` and ``label`` as follows. - If :attr:`reduction` set to ``'none'``, the loss is: + If `reduction` set to ``'none'``, the loss is: .. math:: - Out = \lvert x - label\rvert + Out = \lvert input - label\rvert - If :attr:`reduction` set to ``'mean'``, the loss is: + If `reduction` set to ``'mean'``, the loss is: .. math:: - Out = MEAN(\lvert x - label\rvert) + Out = MEAN(\lvert input - label\rvert) - If :attr:`reduction` set to ``'sum'``, the loss is: + If `reduction` set to ``'sum'``, the loss is: .. math:: - Out = SUM(\lvert x - label\rvert) + Out = SUM(\lvert input - label\rvert) + - Parameters: - x (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64, int32, int64. - label (Tensor): label. The shapes is [N, *], same shape as ``x`` . It's data type should be float32, float64, int32, int64. - reduction (str, optional): Indicate the reduction to apply to the loss, + input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64, int32, int64. + label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64, int32, int64. + reduction (str, optional): Indicate the reduction to apply to the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. - If :attr:`reduction` is ``'none'``, the unreduced loss is returned; - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. - If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. + If `reduction` is ``'none'``, the unreduced loss is returned; + If `reduction` is ``'mean'``, the reduced mean loss is returned. + If `reduction` is ``'sum'``, the reduced sum loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: - Tensor, the L1 Loss of Tensor ``x`` and ``label``. - If :attr:`reduction` is ``'none'``, the shape of output loss is [N, *], the same as ``x`` . - If :attr:`reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1], which means the output is a scalar. + Tensor, the L1 Loss of Tensor ``input`` and ``label``. + If `reduction` is ``'none'``, the shape of output loss is [N, *], the same as ``input`` . + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. Examples: .. code-block:: python import paddle import numpy as np - + paddle.disable_static() - x_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") + input_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") label_data = np.array([[1.7, 1], [0.4, 0.5]]).astype("float32") - x = paddle.to_variable(x_data) + input = paddle.to_variable(input_data) label = paddle.to_variable(label_data) - l1_loss = paddle.nn.functional.l1_loss(x, label) - print(l1_loss.numpy()) + l1_loss = paddle.nn.functional.l1_loss(input, label) + print(l1_loss.numpy()) # [0.35] - l1_loss = paddle.nn.functional.l1_loss(x, label, reduction='none') - print(l1_loss.numpy()) + l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='none') + print(l1_loss.numpy()) # [[0.20000005 0.19999999] # [0.2 0.79999995]] - l1_loss = paddle.nn.functional.l1_loss(x, label, reduction='sum') - print(l1_loss.numpy()) + l1_loss = paddle.nn.functional.l1_loss(input, label, reduction='sum') + print(l1_loss.numpy()) # [1.4] """ if reduction not in ['sum', 'mean', 'none']: @@ -240,7 +601,7 @@ def l1_loss(x, label, reduction='mean', name=None): if in_dygraph_mode(): unreduced = _elementwise_op_in_dygraph( - x, label, axis=-1, act='abs', op_name='elementwise_sub') + input, label, axis=-1, act='abs', op_name='elementwise_sub') if reduction == 'mean': return core.ops.mean(unreduced) elif reduction == 'sum': @@ -250,18 +611,18 @@ def l1_loss(x, label, reduction='mean', name=None): return unreduced fluid.data_feeder.check_variable_and_dtype( - x, 'x', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') + input, 'input', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64', 'int32', 'int64'], 'l1_loss') if reduction == 'sum': - unreduced = paddle.elementwise_sub(x, label, act='abs') + unreduced = paddle.elementwise_sub(input, label, act='abs') return paddle.sum(unreduced, name=name) elif reduction == 'mean': - unreduced = paddle.elementwise_sub(x, label, act='abs') + unreduced = paddle.elementwise_sub(input, label, act='abs') return paddle.mean(unreduced, name=name) else: - return paddle.elementwise_sub(x, label, act='abs', name=name) + return paddle.elementwise_sub(input, label, act='abs', name=name) def nll_loss(input, @@ -371,3 +732,426 @@ def nll_loss(input, out = reshape(out, shape=out_shape) return out + + +def kl_div(input, label, reduction='mean', name=None): + """ + This operator calculates the Kullback-Leibler divergence loss + between Input(X) and Input(Target). Notes that Input(X) is the + log-probability and Input(Target) is the probability. + + KL divergence loss is calculated as follows: + + $$l(x, y) = y * (\log(y) - x)$$ + + While :math:`x` is input and :math:`y` is label. + + While :attr:`reduction` is :attr:`none`, output loss is in + the same shape as input, loss in each point is calculated + seperately and no reduction is applied. + + While :attr:`reduction` is :attr:`mean`, output loss is in + shape of [1] and loss value is the mean value of all losses. + + While :attr:`reduction` is :attr:`sum`, output loss is in + shape of [1] and loss value is the sum value of all losses. + + While :attr:`reduction` is :attr:`batchmean`, output loss is + in shape of [1] and loss value is the sum value of all losses + divided by batch size. + + Args: + input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means + any number of additional dimensions. It's data type should be float32, float64. + label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64. + reduction (Tensor): Indicate how to average the loss, + the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. + If `reduction` is ``'mean'``, the reduced mean loss is returned; + If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; + if `reduction` is ``'sum'``, the reduced sum loss is returned; + if `reduction` is ``'none'``, no reduction will be apllied. + Default is ``'mean'``. + name(str, optional): Name for the operation (optional, default is None). For more information, + please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: The KL divergence loss. The data type is same as input tensor + + Examples: + .. code-block:: python + + import paddle + import numpy as np + import paddle.nn.functional as F + + paddle.enable_imperative() + + shape = (5, 20) + input = np.random.uniform(-10, 10, shape).astype('float32') + target = np.random.uniform(-10, 10, shape).astype('float32') + + # 'batchmean' reduction, loss shape will be [N] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='batchmean') + # shape=[5] + + # 'mean' reduction, loss shape will be [1] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='mean') + # shape=[1] + + # 'sum' reduction, loss shape will be [1] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='sum') + # shape=[1] + + # 'none' reduction, loss shape is same with input shape + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='none') + # shape=[5, 20] + + """ + if paddle.in_dynamic_mode(): + out = core.ops.kldiv_loss(input, label, 'reduction', reduction) + return out + + helper = LayerHelper('kl_div', **locals()) + + fluid.data_feeder.check_variable_and_dtype(input, 'input', + ['float32', 'float64'], 'kl_div') + fluid.data_feeder.check_variable_and_dtype(label, 'label', + ['float32', 'float64'], 'kl_div') + fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div') + + loss = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='kldiv_loss', + inputs={'X': input, + 'Target': label}, + outputs={'Loss': loss}, + attrs={'reduction': reduction}) + return loss + + +def mse_loss(input, label, reduction='mean', name=None): + """ + This op accepts input predications and label and returns the mean square error. + + If :attr:`reduction` is set to ``'none'``, loss is calculated as: + + .. math:: + Out = (input - label)^2 + + If :attr:`reduction` is set to ``'mean'``, loss is calculated as: + + .. math:: + Out = \operatorname{mean}((input - label)^2) + + If :attr:`reduction` is set to ``'sum'``, loss is calculated as: + + .. math:: + Out = \operatorname{sum}((input - label)^2) + + Parameters: + input (Tensor): Input tensor, the data type should be float32 or float64. + label (Tensor): Label tensor, the data type should be float32 or float64. + reduction (string, optional): The reduction method for the output, + could be 'none' | 'mean' | 'sum'. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. + If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + Default is ``'mean'``. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + + Returns: + Tensor: The tensor tensor storing the mean square error difference of input and label. + + Return type: Tensor. + + Examples: + + .. code-block:: python + import numpy as np + import paddle + + + # static graph mode + paddle.enable_static() + mse_loss = paddle.nn.loss.MSELoss() + input = paddle.data(name="input", shape=[1]) + label = paddle.data(name="label", shape=[1]) + place = paddle.CPUPlace() + input_data = np.array([1.5]).astype("float32") + label_data = np.array([1.7]).astype("float32") + + output = mse_loss(input,label) + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + output_data = exe.run( + paddle.static.default_main_program(), + feed={"input":input_data, "label":label_data}, + fetch_list=[output], + return_numpy=True) + print(output_data) + # [array([0.04000002], dtype=float32)] + + # dynamic graph mode + paddle.disable_static() + input = paddle.to_variable(input_data) + label = paddle.to_variable(label_data) + output = mse_loss(input, label) + print(output.numpy()) + # [0.04000002] + + """ + + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "'reduction' in 'mse_loss' should be 'sum', 'mean' or 'none', " + "but received {}.".format(reduction)) + + if not paddle.fluid.framework.in_dygraph_mode(): + paddle.fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'mse_loss') + paddle.fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['float32', 'float64'], 'mse_loss') + + if reduction == 'none': + return paddle.fluid.layers.square( + paddle.fluid.layers.elementwise_sub(input, label), name=name) + elif reduction == 'mean': + return paddle.mean( + paddle.fluid.layers.square( + paddle.fluid.layers.elementwise_sub(input, label)), + name=name) + else: + return paddle.sum(paddle.fluid.layers.square( + paddle.fluid.layers.elementwise_sub(input, label)), + name=name) + + +def ctc_loss(log_probs, + labels, + input_lengths, + label_lengths, + blank=0, + reduction='mean'): + """ + + An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) + to compute Connectionist Temporal Classification (CTC) loss. + It can be aliased as softmax with CTC, since a native softmax activation + is interated to the Warp-CTC library to normalize values for each row of the input tensor. + + Parameters: + log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type must be float32. + labels (Tensor): The ground truth sequence with padding, which must be a 3-D Tensor. The tensor shape is [batch_size, max_label_length], where max_label_length is the longest length of label sequence. The data type must be int32. + input_lengths (Tensor): The length for each input sequence, it should have shape [batch_size] and dtype int64. + label_lengths (Tensor): The length for each label sequence, it should have shape [batch_size] and dtype int64. + blank (int, optional): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the half-opened interval [0, num_classes + 1). The data type must be int32. Default is 0. + reduction (string, optional): Indicate how to average the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the output loss will be divided by the label_lengths, and then return the mean of quotient; If :attr:`reduction` is ``'sum'``, return the sum of loss; If :attr:`reduction` is ``'none'``, no reduction will be applied. Default is ``'mean'``. + + Returns: + Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. + + Examples: + + .. code-block:: python + + # declarative mode + import paddle.nn.functional as F + import numpy as np + import paddle + + # length of the longest logit sequence + max_seq_length = 4 + #length of the longest label sequence + max_label_length = 3 + # number of logit sequences + batch_size = 2 + # class num + class_num = 3 + + np.random.seed(1) + log_probs = np.array([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04], + [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]], + + [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01], + [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]], + + [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02], + [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]], + + [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01], + [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]], + + [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02], + [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]]).astype("float32") + labels = np.array([[1, 2, 2], + [1, 2, 2]]).astype("int32") + input_lengths = np.array([5, 5]).astype("int64") + label_lengths = np.array([3, 3]).astype("int64") + + paddle.disable_static() + log_probs = paddle.to_tensor(log_probs) + labels = paddle.to_tensor(labels) + input_lengths = paddle.to_tensor(input_lengths) + label_lengths = paddle.to_tensor(label_lengths) + + loss = F.ctc_loss(log_probs, labels, + input_lengths, + label_lengths, + blank=0, + reduction='none') + print(loss.numpy()) #[3.9179852 2.9076521] + + loss = F.ctc_loss(log_probs, labels, + input_lengths, + label_lengths, + blank=0, + reduction='mean') + print(loss.numpy()) #[1.1376063] + + """ + + loss_out = fluid.layers.warpctc(log_probs, labels, blank, False, + input_lengths, label_lengths) + + loss_out = fluid.layers.squeeze(loss_out, [-1]) + assert reduction in ['mean', 'sum', 'none'] + if reduction == 'mean': + loss_out = paddle.mean(loss_out / paddle.cast(label_lengths, + loss_out.dtype)) + elif reduction == 'sum': + loss_out = paddle.sum(loss_out) + return loss_out + + +def cross_entropy(input, + label, + weight=None, + ignore_index=-100, + reduction='mean'): + """ + This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, + and ``NLLLoss`` together. + + It is useful when training a classification problem with ``C`` classes. + If provided, the optional argument ``weight`` should be a 1D Variable assigning + weight to each of the classes. + + For predictions label, and target label, the loss is calculated as follows. + + .. math:: + + loss_j = -\\text{input[class]} + + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right), j = 1,..., K + + If weight is not ``None``: + + .. math:: + + loss_j = \\text{weight[class]}(-\\text{input[class]} + + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right)), j = 1,..., K + + Parameters: + input (Tensor): Input tensor, the data type is float32, float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Tensor): Label tensor, the data type is int64. Shape is (N), where each + value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is + (N, D1, D2,..., Dk), k >= 1. + weight (Tensor, optional): Weight tensor, a manual rescaling weight given + to each class and the shape is (C). It has the same dimensions as class + number and the data type is float32, float64. Default is ``'None'``. + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + Default is ``'mean'``. + ignore_index (int64, optional): Specifies a target value that is ignored + and does not contribute to the input gradient. Default is ``-100``. + + Returns: + The tensor variable storing the cross_entropy_loss of input and label. + + Return type: Tensor. + + Examples: + .. code-block:: python + + import paddle + paddle.disable_static() + input_data = np.random.random([5, 100]).astype("float64") + label_data = np.random.randint(0, 100, size=(5)).astype(np.int64) + weight_data = np.random.random([100]).astype("float64") + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) + weight = paddle.to_tensor(weight_data) + loss = paddle.nn.functional.cross_entropy(input=input, label=label, weight=weight) + print(loss.numpy()) + + """ + if not in_dygraph_mode(): + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'cross_entropy_loss') + fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], + 'cross_entropy_loss') + + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" + " 'none', but received %s, which is not allowed." % reduction) + + #step 1. log_softmax + log_softmax_out = paddle.nn.functional.log_softmax(input) + if weight is not None and not isinstance(weight, Variable): + raise ValueError( + "The weight' is not a Variable, please convert to Variable.") + + #step 2. nll_loss + input = log_softmax_out + helper = LayerHelper('nll_loss', **locals()) + dtype = helper.input_dtype(input) + + if not in_dygraph_mode(): + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'nll_loss') + fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], + 'nll_loss') + + x_shape = list(input.shape) + n = x_shape[0] + c = x_shape[1] + x_dims = len(x_shape) + if x_dims < 2: + raise ValueError('Expected 2 or more dimensions (got {})'.format( + x_dims)) + if x_dims != 2 and x_dims != 4: + input = reshape(input, shape=[n, c, 1, -1]) + label = reshape(label, shape=[n, 1, -1]) + out_shape = [n] + x_shape[2:] + + if not in_dygraph_mode(): + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'nll_loss') + fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], + 'nll_loss') + inputs = {'X': input, 'Label': label} + attrs = {'reduction': reduction, 'ignore_index': ignore_index} + if weight is not None: + if isinstance(weight, Variable): + inputs['Weight'] = weight + + out = helper.create_variable_for_type_inference(dtype=input.dtype) + total_weight = helper.create_variable_for_type_inference(dtype=input.dtype) + outputs = {'Out': out, 'Total_weight': total_weight} + + helper.append_op( + type='nll_loss', inputs=inputs, outputs=outputs, attrs=attrs) + if x_dims != 2 and x_dims != 4 and reduction == 'none': + out = reshape(out, shape=out_shape) + + return out diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 04b031b91ce387c1d8266d53725090d23b592f8c..0b007041b4ab336ae355f5d338a0d7dca9b5380e 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -13,6 +13,11 @@ # limitations under the License. # TODO: define normalization api +import paddle +import paddle.fluid as fluid +from ...fluid.data_feeder import check_variable_and_dtype, check_type +from ...fluid.layer_helper import LayerHelper +from ...fluid.framework import in_dygraph_mode, core from ...fluid.layers import l2_normalize #DEFINE_ALIAS from ...fluid.layers import lrn #DEFINE_ALIAS @@ -24,5 +29,84 @@ __all__ = [ 'l2_normalize', # 'layer_norm', 'lrn', + 'normalize', # 'spectral_norm' ] + + +def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): + """ + This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes + + .. math:: + + y = \frac{x}{ \max\left( \lvert \lvert x \rvert \rvert_p, epsilon\right) } + + .. math:: + \lvert \lvert x \rvert \rvert_p = \left(\sum_i {\lvert x_i\rvert^p} \right)^{1/p} + + where, :math:`\sum_i{\lvert x_i\rvert^p}` is calculated along the ``axis`` dimension. + + + Args: + x (Tensor): The input tensor could be N-D tensor, and the input data type could be float32 or float64. + p (float|int, optional): The exponent value in the norm formulation. Default: 2 + axis (int, optional): The axis on which to apply normalization. If ``x`` is 1-D tensor, ``axis`` is fixed to 0. If `axis < 0`, \ + the dimension to normalization is `x.ndim + axis`. -1 is the last dimension. + epsilon (float, optional): Small float added to denominator to avoid dividing by zero. Default is 1e-12. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor, the output has the same shape and data type with ``x``. + + Examples: + + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn.functional as F + + paddle.disable_static() + x = np.arange(6, dtype=np.float32).reshape(2,3) + x = paddle.to_variable(x) + y = F.normalize(x) + print(y.numpy()) + # [[0. 0.4472136 0.8944272 ] + # [0.42426404 0.5656854 0.7071067 ]] + + y = F.normalize(x, p=1.5) + print(y.numpy()) + # [[0. 0.40862012 0.81724024] + # [0.35684016 0.4757869 0.5947336 ]] + + y = F.normalize(x, axis=0) + print(y.numpy()) + # [[0. 0.24253564 0.37139067] + # [1. 0.97014254 0.9284767 ]] + """ + if len(x.shape) == 1: + axis = 0 + if in_dygraph_mode(): + eps = fluid.dygraph.base.to_variable([epsilon], dtype=x.dtype) + out = core.ops.p_norm(x, 'axis', axis, 'porder', + float(p), 'keepdim', True, 'epsilon', epsilon) + return x / core.ops.elementwise_max(out, eps) + + check_type(p, 'p', (float, int), 'normalize') + check_type(axis, 'axis', (int), 'normalize') + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'normalize') + + attrs = { + 'axis': axis, + 'porder': float(p), + 'keepdim': True, + 'epsilon': epsilon, + } + helper = LayerHelper('p_norm', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='p_norm', inputs={'X': x}, outputs={'Out': out}, attrs=attrs) + eps = out.block.create_var(dtype=out.dtype) + paddle.fill_constant([1], out.dtype, epsilon, out=eps) + return paddle.elementwise_div(x, paddle.maximum(out, eps), name=name) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 618145fb1fad47e2105edf6186bb4606494d57c9..96d361e7ecf318df02417fdf6ed011a3f5ff328a 100644 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -17,5 +17,1449 @@ from ...fluid.layers import pool2d #DEFINE_ALIAS from ...fluid.layers import pool3d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool2d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool3d #DEFINE_ALIAS +from ...fluid import core +from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ +from ...fluid.layers import utils, LayerHelper +from ...fluid.data_feeder import check_type, check_variable_and_dtype, check_type, check_dtype, convert_dtype +from ...fluid.layers import unsqueeze, squeeze -__all__ = ['pool2d', 'pool3d', 'adaptive_pool2d', 'adaptive_pool3d'] +__all__ = [ + 'pool2d', + 'pool3d', + 'avg_pool1d', + 'max_pool1d', + 'adaptive_avg_pool1d', + 'adaptive_max_pool1d', + 'adaptive_avg_pool2d', + 'adaptive_avg_pool3d', + 'adaptive_pool2d', + 'adaptive_pool3d', + 'max_pool2d', + 'avg_pool2d', + 'max_pool3d', + 'avg_pool3d', +] + + +def check_input(x, dimension): + if len(x.shape) != dimension: + raise ValueError("Excepted Input X is 3-D tensor, but received {}-D {}". + format(len(x.shape), type(x))) + + +def check_instance(x, x_name, types=(int, float)): + + if not isinstance(x, types): + raise ValueError("Excepted {} type for {} but received type: {}. ". + format(types, x_name, type(x))) + + +def update_padding1d(padding, pool_type='avg'): + def is_list_or_tuple(ele): + if isinstance(ele, list) or isinstance(ele, tuple): + return True + return False + + if is_list_or_tuple(padding): + if padding.__len__() == 1 and not is_list_or_tuple(padding[0]): + return [0, padding[0]] + else: + raise ValueError( + "{}_pool1d() argument 'padding' should contain one int (got {})". + format(pool_type, padding.__len__())) + else: + padding = [0, padding] + + return padding + + +def update_padding2d(padding, data_format): + def is_list_or_tuple(ele): + if isinstance(ele, list) or isinstance(ele, tuple): + return True + return False + + if is_list_or_tuple(padding) and len(padding) == 4: + if is_list_or_tuple(padding[0]) and (data_format == "NCHW"): + if not (padding[0] == [0, 0] and padding[1] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[2:4] + padding = [ele for a_list in padding for ele in a_list] + elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"): + if not (padding[0] == [0, 0] and padding[3] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[1:3] + padding = [ele for a_list in padding for ele in a_list] + padding = utils.convert_to_list(padding, 4, 'padding') + + if utils._is_symmetric_padding(padding, 2): + padding = [padding[0], padding[2]] + else: + padding = utils.convert_to_list(padding, 2, 'padding') + + return padding + + +def update_padding3d(padding, data_format): + def is_list_or_tuple(ele): + if isinstance(ele, (list, tuple)): + return True + return False + + if is_list_or_tuple(padding) and len(padding) == 5: + if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): + if not (padding[0] == [0, 0] and padding[1] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[2:5] + padding = [ele for a_list in padding for ele in a_list] + elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"): + if not (padding[0] == [0, 0] and padding[4] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[1:4] + padding = [ele for a_list in padding for ele in a_list] + padding = utils.convert_to_list(padding, 6, 'padding') + if utils._is_symmetric_padding(padding, 3): + padding = [padding[0], padding[2], padding[4]] + + elif is_list_or_tuple(padding) and len(padding) == 6: + padding = utils.convert_to_list(padding, 6, 'padding') + if utils._is_symmetric_padding(padding, 3): + padding = [padding[0], padding[2], padding[4]] + else: + padding = utils.convert_to_list(padding, 3, 'padding') + + return padding + + +def avg_pool1d(x, + kernel_size, + stride=None, + padding=0, + count_include_pad=True, + ceil_mode=False, + name=None): + """ + + This operation applies a 1D average pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + The output value of the layer with input size (N, C, L), + output (N, C, L_{out}) and kernel_size k can be precisely described as + For average pool1d: + + .. math:: + + Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k]) + + + Args: + x (Tensor): The input tensor of pooling operator which is a 3-D tensor with + shape [N, C, L]. where `N` is batch size, `C` is the number of channels, + `L` is the length of the feature. The data type if float32 or float64. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one integers. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain one integers. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, + then the input is implicitly zero-padded on both sides for padding number of points. + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. + If it is set to False, the floor function will be used. Default False + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ValueError: If `padding` is a list or tuple but its length greater than 1. + ShapeError: If the input is not a 3-D. + ShapeError: If the output's shape calculated is not greater than 0. + + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn.functional as F + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0) + # pool_out shape: [1, 3, 16] + + """ + """NCL to NCHW""" + data_format = "NCHW" + check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'avg_pool1d') + check_input(x, 3) + x = unsqueeze(x, [2]) + kernel_size = utils.convert_to_list(kernel_size, 1, 'pool_size') + kernel_size = [1] + kernel_size + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 1, 'pool_stride') + stride = [1] + stride + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0] + if ceil_mode != False: + raise ValueError( + "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0] + + padding = update_padding1d(padding, "avg") + + if in_dygraph_mode(): + output = core.ops.pool2d( + x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', + False, 'strides', stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'use_cudnn', not count_include_pad, 'ceil_mode', + ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format', + data_format) + return squeeze(output, [2]) + + op_type = 'pool2d' + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": 'avg', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": not count_include_pad, + "data_format": data_format, + }) + + return squeeze(pool_out, [2]) + + +def max_pool1d(x, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + name=None): + """ + + Applies a 1D max pooling over an input signal composed of several input planes based + on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + + The output value of the layer with input size (N, C, L), + output (N, C, L_{out}) and kernel_size k can be precisely described as + For average pool1d: + + .. math:: + + Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} + + Args: + x (Tensor): The input tensor of pooling operator which is a 3-D tensor with + shape [N, C, L], where `N` is batch size, `C` is the number of channels, + `L` is the length of the feature. The data type if float32 or float64. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one integers. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain one integers. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be the following forms: `[pad_left, pad_right]`. + return_indices (bool): Whether return the max indices along with the outputs. default is `False`. + ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. + If it is set to False, the floor function will be used. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ValueError: If `padding` is a list or tuple but its length greater than 1. + ShapeError: If the input is not a 3-D. + ShapeError: If the output's shape calculated is not greater than 0. + + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn.functional as F + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) + # pool_out shape: [1, 3, 16] + + pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True) + # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + + """ + """NCL to NCHW""" + data_format = "NCHW" + check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'max_pool1d') + check_input(x, 3) + x = unsqueeze(x, [2]) + kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride') + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0] + if ceil_mode != False: + raise ValueError( + "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0] + + padding = update_padding1d(padding, 'max') + + if in_dygraph_mode(): + pool_out = core.ops.max_pool2d_with_index( + x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, + 'paddings', padding, 'padding_algorithm', padding_algorithm, + 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, + 'exclusive', True, 'data_format', data_format) + return (squeeze(pool_out[0], [2]), squeeze( + pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + + op_type = 'max_pool2d_with_index' + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": True, + "data_format": data_format, + }) + + return (squeeze(pool_out, [2]), + squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) + + +def adaptive_avg_pool1d(x, output_size, name=None): + """ + + This operation applies a 1D adaptive average pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For average adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} + + Args: + x (Tensor): The input tensor of pooling operator, which is a 3-D tensor + with shape [N, C, L]. The format of input tensor is NCL, + where N is batch size, C is the number of channels, L is the + length of the feature. The data type is float32 or float64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of adaptive average pooling result. The data type is same + as input tensor. + + Raises: + ValueError: 'output_size' should be a integer or list or tuple with length as 1. + + Examples: + .. code-block:: python + + # average adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) + # + import paddle + import paddle.nn.functional as F + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.adaptive_average_pool1d(data, output_size=16) + # pool_out shape: [1, 3, 16]) + """ + pool_type = 'avg' + check_variable_and_dtype(x, 'input', ['float32', 'float64'], + 'adaptive_pool2d') + check_input(x, 3) + check_type(output_size, 'pool_size', (int), 'adaptive_pool1d') + + pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') + + l_type = "pool2d" + x = unsqueeze(x, [2]) + if in_dygraph_mode(): + pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize', + pool_size, 'adaptive', True) + return squeeze(pool_out, [2]) + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + outputs = {"Out": pool_out} + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) + + return squeeze(pool_out, [2]) + + +def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): + """ + This operation applies a 1D adaptive max pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For max adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= max(Input[lstart:lend])} + + Args: + x (Tensor): The input tensor of pooling operator, which is a 3-D tensor + with shape [N, C, L]. The format of input tensor is NCL, + where N is batch size, C is the number of channels, L is the + length of the feature. The data type is float32 or float64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + return_indices (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of adaptive pooling result. The data type is same + as input tensor. + + Raises: + ValueError: 'output_size' should be a integer or list or tuple with length as 1. + + Examples: + .. code-block:: python + + # max adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = max(input[:, :, lstart: lend]) + # + import paddle + import paddle.nn.functional as F + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.adaptive_max_pool1d(data, output_size=16) + # pool_out shape: [1, 3, 16]) + + pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True) + # pool_out shape: [1, 3, 16] indices shape: [1, 3, 16] + + """ + pool_type = 'max' + check_variable_and_dtype(x, 'input', ['float32', 'float64'], + 'adaptive_max_pool1d') + check_input(x, 3) + check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d') + check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d') + + pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') + + l_type = 'max_pool2d_with_index' + + x = unsqueeze(x, [2]) + if in_dygraph_mode(): + pool_out = core.ops.max_pool2d_with_index( + x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True) + return (squeeze(pool_out[0], [2]), squeeze( + pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) + + return (squeeze(pool_out, [2]), + squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) + + +def max_pool2d(x, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + data_format="NCHW", + name=None): + """ + This operation applies 2D max pooling over input feature based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + + Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Attr: + kernel_size: ksize + stride: stride + + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + $$ + out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\ + & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, + \text{stride[1]} \times w + n) + $$ + + Args: + x (Tensor): The input tensor of pooling operator which is a 4-D tensor with + shape [N, C, H, W]. The format of input tensor is `"NCHW"` or + `"NHWC"`, where `N` is batch size, `C` is the number of channels, + `H` is the height of the feature, and `W` is the width of the + feature. The data type if float32 or float64. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be in three forms: `[pad_height, pad_width]` or + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, + `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + Otherwise, the pool padding size will be a square of an int. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + return_indices (bool): Whether to return the max indices along with the outputs. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + paddle.disable_static() + + # max pool2d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + output = F.max_pool2d(input, + kernel_size=2, + stride=2, padding=0) + # output.shape [1, 3, 16, 16] + + # for return_indices=True + output, max_indices = F.max_pool2d(input, + kernel_size=2, + stride=2, + padding=0, + return_indices=True) + # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], + """ + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d') + kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 2, 'pool_stride') + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0, 0] + + padding = update_padding2d(padding, data_format) + + if in_dygraph_mode(): + output = core.ops.max_pool2d_with_index( + x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, + 'paddings', padding, 'padding_algorithm', padding_algorithm, + 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, + 'exclusive', True, 'data_format', data_format) + return output if return_indices else output[0] + + op_type = 'max_pool2d_with_index' + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": True, + "data_format": data_format, + }) + + return (pool_out, mask) if return_indices else pool_out + + +def avg_pool2d(x, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCHW", + name=None): + """ + This operation applies 2D average pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + + Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Attr: + kernel_size: ksize + + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + $$ + out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} + input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) + $$ + + Args: + x (Tensor): The input tensor of pooling operator which is a 4-D tensor with + shape [N, C, H, W]. The format of input tensor is `"NCHW"` or + `"NHWC"`, where `N` is batch size, `C` is the number of channels, + `H` is the height of the feature, and `W` is the width of the + feature. The data type if float32 or float64. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be in three forms: `[pad_height, pad_width]` or + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, + `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + Otherwise, the pool padding size will be a square of an int. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + paddle.disable_static() + + # avg pool2d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + output = F.avg_pool2d(input, + kernel_size=2, + stride=2, padding=0) + # output.shape [1, 3, 16, 16] + + """ + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d') + kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 2, 'pool_stride') + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0, 0] + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + pool_padding = update_padding2d(padding, data_format) + + if in_dygraph_mode(): + output = core.ops.pool2d( + x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', + False, 'padding_algorithm', padding_algorithm, 'strides', stride, + 'paddings', pool_padding, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', not count_include_pad, + 'data_format', data_format) + if divisor_override is None: + return output + else: + check_instance(divisor_override, "divisor_override") + return output * (kernel_size[0] * kernel_size[1]) / divisor_override + + op_type = 'pool2d' + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs={"Out": pool_out}, + attrs={ + "pooling_type": "avg", + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": pool_padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": not count_include_pad, + "data_format": data_format, + }) + + if divisor_override is None: + return pool_out + else: + check_instance(divisor_override, "divisor_override") + return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override + + +def max_pool3d(x, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + data_format="NCDHW", + name=None): + """ + This operation applies 3D max pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + + Example: + Input: + X shape: $(N, C, D_{in}, H_{in}, W_{in})$ + Attr: + kernel_size: ksize + + Output: + Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ + $$ + \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, ksize[0]-1} \max_{m=0, \ldots, ksize[1]-1} \max_{n=0, \ldots, ksize[2]-1} \\ + & \text{input}(N_i, C_j, \text{stride[0]} \times d + k, + \text{stride[1]} \times h + m, \text{stride[2]} \times w + n) + $$ + + Args: + x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with + shape [N, C, D, H, W]. The format of + input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is + the number of channels, `D` is the depth of the feature, + `H` is the height of the feature, and `W` is the width + of the feature. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (pool_size_Depth, pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, + it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + Otherwise, the pool stride size will be a cube of an int. + padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + ceil_mode (bool): ${ceil_mode_comment} + return_indices (bool): Whether to return the max indices along with the outputs. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + paddle.disable_static() + + # max pool3d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + output = F.max_pool2d(input, + kernel_size=2, + stride=2, padding=0) + output.shape [1, 3, 16, 16, 16] + + # for return_indices=True + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + output, max_indices = paddle.nn.functional.max_pool3d(input, + kernel_size = 2, + stride = 2, + padding=0, + return_indices=True) + # output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16], + + """ + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') + kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 3, 'pool_stride') + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0, 0, 0] + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s" % str(data_format)) + padding = update_padding3d(padding, data_format) + + if in_dygraph_mode(): + output = core.ops.max_pool3d_with_index( + x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides', stride, + 'paddings', padding, 'global_pooling', False, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) + return output if return_indices else output[0] + + op_type = "max_pool3d_with_index" + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": False, + "data_format": data_format, + }) + + return (pool_out, mask) if return_indices else pool_out + + +def avg_pool3d(x, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=False, + divisor_override=None, + data_format="NCDHW", + name=None): + """ + This operation applies 3D max pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + + Args: + input (Tensor): The input tensor of pooling operator, which is a 5-D tensor with + shape [N, C, D, H, W], where `N` is batch size, `C` is + the number of channels, `D` is the depth of the feature, + `H` is the height of the feature, and `W` is the width + of the feature. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (pool_size_Depth, pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, + it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + Otherwise, the pool stride size will be a cube of an int. + padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + ceil_mode (bool): ${ceil_mode_comment} + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is True. + divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle.fluid as fluid + import paddle + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + # avg pool3d + pool3d = paddle.nn.functional.avg_pool3d( + input, + kernel_size = 2, + stride = 2, + padding=0) + # pool3d.shape: [1, 3, 16, 16, 16] + """ + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') + kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 3, 'pool_stride') + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(padding)) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. " + "Received ceil_mode: True.") + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0, 0, 0] + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s" % str(data_format)) + padding = update_padding3d(padding, data_format) + + if in_dygraph_mode(): + output = core.ops.pool3d( + x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride, + 'paddings', padding, 'global_pooling', False, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', not count_include_pad, + 'data_format', data_format) + if divisor_override is None: + return output + else: + check_instance(divisor_override, "divisor_override") + return output * (kernel_size[0] * kernel_size[1] * + kernel_size[2]) / divisor_override + + op_type = "pool3d" + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out} + + helper.append_op( + type=op_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'avg', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": not count_include_pad, + "data_format": data_format, + }) + + if divisor_override is None: + return pool_out + else: + check_instance(divisor_override, "divisor_override") + return pool_out * (kernel_size[0] * kernel_size[1] * + kernel_size[2]) / divisor_override + + +def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): + """ + + This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool2d` . + + For avg adaptive pool2d: + + .. math:: + + hstart &= floor(i * H_{in} / H_{out}) + + hend &= ceil((i + 1) * H_{in} / H_{out}) + + wstart &= floor(j * W_{in} / W_{out}) + + wend &= ceil((j + 1) * W_{in} / W_{out}) + + Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} + + Args: + x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. + The data type can be float16, float32, float64, int32 or int64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two element, (H, W). H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in + the order of: [batch_size, input_channels, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor. + + Raises: + ValueError: If `data_format` is not "NCHW" or "NHWC". + + Examples: + .. code-block:: python + + # adaptive avg pool2d + # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + # of input data into m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(m): + # for j in range(n): + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 32, 32] + pool_out = paddle.nn.functional.adaptive_avg_pool2d( + x = x, + output_size=[3, 3]) + # pool_out.shape is [2, 3, 3, 3] + """ + if not in_dygraph_mode(): + check_variable_and_dtype( + x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], + 'adaptive_avg_pool2d') + check_type(data_format, 'data_format', str, 'adaptive_avg_pool2d') + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + + if data_format == "NCHW": + in_h, in_w = x.shape[2:4] + else: + in_h, in_w = x.shape[1:3] + + if isinstance(output_size, int): + output_size = utils.convert_to_list(output_size, 2, 'output_size') + else: + if output_size[0] == None: + output_size[0] = in_h + if output_size[1] == None: + output_size[1] = in_w + + if in_dygraph_mode(): + output = core.ops.pool2d(x, 'pooling_type', 'avg', 'ksize', output_size, + 'global_pooling', False, 'adaptive', True, + 'data_format', data_format) + return output + + l_type = 'pool2d' + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + outputs = {"Out": pool_out} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": "avg", + "ksize": output_size, + "adaptive": True, + "data_format": data_format, + }) + + return pool_out + + +def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): + """ + + This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool3d` . + + For avg adaptive pool3d: + + .. math:: + + dstart &= floor(i * D_{in} / D_{out}) + + dend &= ceil((i + 1) * D_{in} / D_{out}) + + hstart &= floor(j * H_{in} / H_{out}) + + hend &= ceil((j + 1) * H_{in} / H_{out}) + + wstart &= floor(k * W_{in} / W_{out}) + + wend &= ceil((k + 1) * W_{in} / W_{out}) + + Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + + Args: + x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. + The data type can be float16, float32, float64, int32 or int64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in + the order of: [batch_size, input_channels, input_depth, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor. + + Raises: + ValueError: If `data_format` is not "NCDHW" or "NDHWC". + + Examples: + .. code-block:: python + + # adaptive avg pool3d + # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + # of input data into l * m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(l): + # for j in range(m): + # for k in range(n): + # dstart = floor(i * D / l) + # dend = ceil((i + 1) * D / l) + # hstart = floor(j * H / m) + # hend = ceil((j + 1) * H / m) + # wstart = floor(k * W / n) + # wend = ceil((k + 1) * W / n) + # output[:, :, i, j, k] = + # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 8, 32, 32] + pool_out = paddle.nn.functional.adaptive_avg_pool3d( + x = x, + output_size=[3, 3, 3]) + # pool_out.shape is [2, 3, 3, 3, 3] + """ + if not in_dygraph_mode(): + check_variable_and_dtype( + x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], + 'adaptive_avg_pool3d') + check_type(data_format, 'data_format', str, 'adaptive_avg_pool3d') + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s." % str(data_format)) + + if data_format == "NCDHW": + in_l, in_h, in_w = x.shape[2:5] + else: + in_l, in_h, in_w = x.shape[1:4] + + if isinstance(output_size, int): + output_size = utils.convert_to_list(output_size, 3, 'output_size') + else: + if output_size[0] == None: + output_size[0] = in_l + if output_size[1] == None: + output_size[1] = in_h + if output_size[2] == None: + output_size[2] = in_w + + if in_dygraph_mode(): + output = core.ops.pool3d(x, 'pooling_type', 'avg', 'ksize', output_size, + 'global_pooling', False, 'adaptive', True, + 'data_format', data_format) + return output + + l_type = 'pool3d' + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": "avg", + "ksize": output_size, + "adaptive": True, + "data_format": data_format, + }) + + return pool_out diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index a2cc8fde5ad7147b7af4765de834508f1f3cc825..1dfdac26e990851ac5f192742acd47fb92633d0d 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ...device import get_cudnn_version +from ...fluid.framework import core, in_dygraph_mode, Variable +from ...fluid.layer_helper import LayerHelper +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid import dygraph_utils +import numpy as np + # TODO: define specitial functions used in computer vision task from ...fluid.layers import affine_channel #DEFINE_ALIAS -from ...fluid.layers import affine_grid #DEFINE_ALIAS from ...fluid.layers import anchor_generator #DEFINE_ALIAS from ...fluid.layers import bipartite_match #DEFINE_ALIAS from ...fluid.layers import box_clip #DEFINE_ALIAS @@ -44,7 +50,7 @@ from ...fluid.layers import yolov3_loss #DEFINE_ALIAS from ...fluid.layers import fsp_matrix #DEFINE_ALIAS from ...fluid.layers import image_resize_short #DEFINE_ALIAS -from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS +# from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS @@ -89,3 +95,313 @@ __all__ = [ 'yolo_box', 'yolov3_loss' ] + + +def affine_grid(theta, out_shape, align_corners=True, name=None): + """ + It generates a grid of (x,y) coordinates using the parameters of + the affine transformation that correspond to a set of points where + the input feature map should be sampled to produce the transformed + output feature map. + + Args: + theta (Tensor) - A tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters. + The data type can be float32 or float64. + out_shape (Tensor | list | tuple): The shape of target output with format [batch_size, channel, height, width]. + ``out_shape`` can be a Tensor or a list or tuple. The data + type must be int32. + align_corners(bool): Whether to align corners of target feature map and source feature map. Default: True. + name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor, A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`. + + Raises: + ValueError: If the type of arguments is not supported. + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn.functional as F + import numpy as np + paddle.disable_static() + # theta shape = [1, 2, 3] + theta = np.array([[[-0.7, -0.4, 0.3], + [ 0.6, 0.5, 1.5]]]).astype("float32") + theta_t = paddle.to_tensor(theta) + y_t = F.affine_grid( + theta_t, + [1, 2, 3, 3], + align_corners=False) + print(y_t.numpy()) + + #[[[[ 1.0333333 0.76666665] + # [ 0.76666665 1.0999999 ] + # [ 0.5 1.4333333 ]] + # + # [[ 0.5666667 1.1666666 ] + # [ 0.3 1.5 ] + # [ 0.03333333 1.8333334 ]] + # + # [[ 0.10000002 1.5666667 ] + # [-0.16666666 1.9000001 ] + # [-0.43333334 2.2333333 ]]]] + """ + helper = LayerHelper('affine_grid') + + if not isinstance(theta, Variable): + raise ValueError("The theta should be a Tensor.") + check_variable_and_dtype(theta, 'theta', ['float32', 'float64'], + 'affine_grid') + cudnn_version = get_cudnn_version() + if cudnn_version is not None and cudnn_version >= 6000 and align_corners: + use_cudnn = True + else: + use_cudnn = False + + if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \ + isinstance(out_shape, Variable)): + raise ValueError("The out_shape should be a list, tuple or Tensor.") + + if in_dygraph_mode(): + _out_shape = out_shape.numpy().tolist() if isinstance( + out_shape, Variable) else out_shape + return core.ops.affine_grid(theta, "output_shape", _out_shape, + "align_corners", align_corners, "use_cudnn", + use_cudnn) + + out = helper.create_variable_for_type_inference(theta.dtype) + ipts = {'Theta': theta} + attrs = {"align_corners": align_corners, "use_cudnn": use_cudnn} + if isinstance(out_shape, Variable): + ipts['OutputShape'] = out_shape + check_variable_and_dtype(out_shape, 'out_shape', ['int32'], + 'affine_grid') + else: + attrs['output_shape'] = out_shape + + helper.append_op( + type='affine_grid', + inputs=ipts, + outputs={'Output': out}, + attrs=None if len(attrs) == 0 else attrs) + return out + + +def grid_sample(x, + grid, + mode='bilinear', + padding_mode='zeros', + align_corners=True, + name=None): + """ + This operation samples input X by using bilinear interpolation or + nearest interpolation based on flow field grid, which is usually + generated by :code:`affine_grid` . The grid of shape [N, H, W, 2] + is the concatenation of (x, y) coordinates with shape [N, H, W] each, + where x is indexing the 4th dimension (in width dimension) of input + data x and y is indexing the 3rd dimension (in height dimension), + finally results is the bilinear interpolation or nearest value of 4 nearest corner + points. The output tensor shape will be [N, C, H, W]. + .. code-block:: text + Step 1: + Get (x, y) grid coordinates and scale to [0, H-1/W-1]. + .. code-block:: text + grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) + grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) + Step 2: + Indices input data X with grid (x, y) in each [H, W] area, and bilinear + interpolate point value by 4 nearest points or nearest interpolate point value + by nearest point. + wn ------- y_n ------- en + | | | + | d_n | + | | | + x_w --d_w-- grid--d_e-- x_e + | | | + | d_s | + | | | + ws ------- y_s ------- wn + For bilinear interpolation: + x_w = floor(x) // west side x coord + x_e = x_w + 1 // east side x coord + y_n = floor(y) // north side y coord + y_s = y_s + 1 // south side y coord + d_w = grid_x - x_w // distance to west side + d_e = x_e - grid_x // distance to east side + d_n = grid_y - y_n // distance to north side + d_s = y_s - grid_y // distance to south side + wn = X[:, :, y_n, x_w] // north-west point value + en = X[:, :, y_n, x_e] // north-east point value + ws = X[:, :, y_s, x_w] // south-east point value + es = X[:, :, y_s, x_w] // north-east point value + output = wn * d_e * d_s + en * d_w * d_s + + ws * d_e * d_n + es * d_w * d_n + Args: + x(Tensor): The input tensor, which is a 4-d tensor with shape + [N, C, H, W], N is the batch size, C is the channel + number, H and W is the feature height and width. + The data type is float32 or float64. + grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The + data type is float32 or float64. + mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. + Default: 'bilinear'. + padding_mode(str, optional) The padding method used when source index + is out of input images. It can be 'zeros', 'reflect' and 'border'. + Default: zeros. + align_corners(bool, optional): If `align_corners` is true, it will projects + -1 and 1 to the centers of the corner pixels. Otherwise, it will + projects -1 and 1 to the image edges. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. + + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + + # shape=[1, 1, 3, 3] + x = np.array([[[[-0.6, 0.8, -0.5], + [-0.5, 0.2, 1.2], + [ 1.4, 0.3, -0.2]]]]).astype("float64") + + # grid shape = [1, 3, 4, 2] + grid = np.array( + [[[[ 0.2, 0.3], + [-0.4, -0.3], + [-0.9, 0.3], + [-0.9, -0.6]], + [[ 0.4, 0.1], + [ 0.9, -0.8], + [ 0.4, 0.5], + [ 0.5, -0.2]], + [[ 0.1, -0.8], + [-0.3, -1. ], + [ 0.7, 0.4], + [ 0.2, 0.8]]]]).astype("float64") + + paddle.disable_static() + x = paddle.to_tensor(x) + grid = paddle.to_tensor(grid) + y_t = F.grid_sample( + x, + grid, + mode='bilinear', + padding_mode='border', + align_corners=True) + print(y_t.numpy()) + + # output shape = [1, 1, 3, 4] + # [[[[ 0.34 0.016 0.086 -0.448] + # [ 0.55 -0.076 0.35 0.59 ] + # [ 0.596 0.38 0.52 0.24 ]]]] + """ + helper = LayerHelper("grid_sample", **locals()) + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sampler') + check_variable_and_dtype(grid, 'grid', ['float32', 'float64'], + 'grid_sampler') + if not isinstance(x, Variable): + raise ValueError("The x should be a Variable") + if not isinstance(grid, Variable): + raise ValueError("The grid should be a Variable") + _modes = ['bilinear', 'nearest'] + _padding_modes = ['zeros', 'reflect', 'border'] + if mode not in _modes: + raise ValueError( + "The mode of grid sample function should be in {}, but got: {}". + format(_modes, mode)) + if padding_mode not in _padding_modes: + raise ValueError( + "The padding mode of grid sample function should be in {}, but got: {}". + format(_padding_modes, padding_mode)) + + if not isinstance(align_corners, bool): + raise ValueError("The align corners should be bool, but got: {}".format( + align_corners)) + + cudnn_version = get_cudnn_version() + use_cudnn = False + if (cudnn_version is not None + ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros': + use_cudnn = True + ipts = {'X': x, 'Grid': grid} + attrs = { + 'mode': mode, + 'padding_mode': padding_mode, + 'align_corners': align_corners, + 'use_cudnn': use_cudnn + } + + if in_dygraph_mode(): + attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners', + align_corners, 'use_cudnn', use_cudnn) + out = getattr(core.ops, 'grid_sampler')(x, grid, *attrs) + else: + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='grid_sampler', + inputs=ipts, + attrs=attrs, + outputs={'Output': out}) + return out + + +def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): + """ + This API implements pixel shuffle operation. + See more details in :ref:`api_nn_vision_PixelShuffle` . + Parameters: + x(Tensor): 4-D tensor, the data type should be float32 or float64. + upscale_factor(int): factor to increase spatial resolution. + data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): The default value is None. Normally there is no need for user to set this property. + Returns: + Out(tensor): Reshaped tensor according to the new dimension. + Raises: + ValueError: If the square of upscale_factor cannot divide the channels of input. + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + x = np.random.randn(2, 9, 4, 4).astype(np.float32) + paddle.disable_static() + x_var = paddle.to_tensor(x) + out_var = F.pixel_shuffle(x_var, 3) + out = out_var.numpy() + print(out.shape) + # (2, 1, 12, 12) + """ + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'pixel_shuffle') + + if not isinstance(upscale_factor, int): + raise TypeError("upscale factor must be int type") + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." + "But recevie Attr(data_format): {} ".format( + data_format)) + + if in_dygraph_mode(): + return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor, + "data_format", data_format) + + helper = LayerHelper("pixel_shuffle", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type="pixel_shuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"upscale_factor": upscale_factor, + "data_format": data_format}) + return out diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 680885ac26a52eaf8599ce5f152d3615bf5af8aa..b25350be601dd9e56d8268859b52a12d3745c44d 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -20,7 +20,9 @@ from . import conv from . import extension from . import activation from . import norm +from . import vision from . import distance +from . import transformer from .activation import * from .loss import * @@ -28,6 +30,9 @@ from .conv import * from .extension import * from .activation import * from .norm import * +from .vision import * + +from .transformer import * # from .activation import PReLU #DEFINE_ALIAS from .activation import ReLU #DEFINE_ALIAS from .activation import LeakyReLU #DEFINE_ALIAS @@ -38,14 +43,40 @@ from .activation import HSigmoid #DEFINE_ALIAS from .common import BilinearTensorProduct #DEFINE_ALIAS from .common import Pool2D #DEFINE_ALIAS from .common import Pad2D #DEFINE_ALIAS +from .common import ReflectionPad1d #DEFINE_ALIAS +from .common import ReplicationPad1d #DEFINE_ALIAS +from .common import ConstantPad1d #DEFINE_ALIAS +from .common import ReflectionPad2d #DEFINE_ALIAS +from .common import ReplicationPad2d #DEFINE_ALIAS +from .common import ConstantPad2d #DEFINE_ALIAS +from .common import ZeroPad2d #DEFINE_ALIAS +from .common import ReplicationPad3d #DEFINE_ALIAS +from .common import ConstantPad3d #DEFINE_ALIAS +from .common import CosineSimilarity #DEFINE_ALIAS from .common import Embedding #DEFINE_ALIAS from .common import Linear #DEFINE_ALIAS from .common import Flatten #DEFINE_ALIAS from .common import UpSample #DEFINE_ALIAS -from .conv import Conv2D #DEFINE_ALIAS -from .conv import Conv2DTranspose #DEFINE_ALIAS -from .conv import Conv3D #DEFINE_ALIAS -from .conv import Conv3DTranspose #DEFINE_ALIAS +from .common import Dropout #DEFINE_ALIAS +from .common import Dropout2D #DEFINE_ALIAS +from .common import Dropout3D #DEFINE_ALIAS +from .common import AlphaDropout #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS +from .pooling import AvgPool1d #DEFINE_ALIAS +from .pooling import MaxPool1d #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS +from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS +from .pooling import AvgPool2d #DEFINE_ALIAS +from .pooling import MaxPool2d #DEFINE_ALIAS +from .pooling import AvgPool3d #DEFINE_ALIAS +from .pooling import MaxPool3d #DEFINE_ALIAS +from .conv import Conv1d #DEFINE_ALIAS +from .conv import Conv2d #DEFINE_ALIAS +from .conv import Conv3d #DEFINE_ALIAS +from .conv import ConvTranspose1d #DEFINE_ALIAS +from .conv import ConvTranspose2d #DEFINE_ALIAS +from .conv import ConvTranspose3d #DEFINE_ALIAS # from .conv import TreeConv #DEFINE_ALIAS # from .conv import Conv1D #DEFINE_ALIAS from .extension import RowConv #DEFINE_ALIAS @@ -57,13 +88,18 @@ from .extension import RowConv #DEFINE_ALIAS # from .learning_rate import PiecewiseDecay #DEFINE_ALIAS # from .learning_rate import PolynomialDecay #DEFINE_ALIAS # from .loss import NCELoss #DEFINE_ALIAS +from .loss import BCEWithLogitsLoss #DEFINE_ALIAS from .loss import CrossEntropyLoss #DEFINE_ALIAS from .loss import MSELoss #DEFINE_ALIAS from .loss import L1Loss #DEFINE_ALIAS from .loss import NLLLoss #DEFINE_ALIAS from .loss import BCELoss #DEFINE_ALIAS +from .loss import KLDivLoss #DEFINE_ALIAS from .loss import MarginRankingLoss #DEFINE_ALIAS +from .loss import CTCLoss #DEFINE_ALIAS +from .loss import SmoothL1Loss #DEFINE_ALIAS from .norm import BatchNorm #DEFINE_ALIAS +from .norm import SyncBatchNorm #DEFINE_ALIAS from .norm import GroupNorm #DEFINE_ALIAS from .norm import LayerNorm #DEFINE_ALIAS from .norm import SpectralNorm #DEFINE_ALIAS @@ -71,4 +107,6 @@ from .norm import InstanceNorm #DEFINE_ALIAS # from .rnn import RNNCell #DEFINE_ALIAS # from .rnn import GRUCell #DEFINE_ALIAS # from .rnn import LSTMCell #DEFINE_ALIAS + +from .vision import PixelShuffle #DEFINE_ALIAS from .distance import PairwiseDistance #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index d13f36a31854acbd990e4f9f26d71c046fc8848d..bb0bd5f70f1f9006285a3fb200537d35b4cf6c30 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -15,19 +15,256 @@ # TODO: define activation functions of neural network __all__ = [ - # 'PReLU', + 'ELU', + 'GELU', + 'Hardshrink', + 'Tanh', + 'Hardtanh', + 'PReLU', 'ReLU', + 'ReLU6', + 'SELU', 'LeakyReLU', 'Sigmoid', - # 'Softmax', + 'Softmax', + 'Softplus', + 'Softshrink', + 'Softsign', + 'Tanhshrink', + 'LogSigmoid', 'LogSoftmax', - 'HSigmoid' + 'HSigmoid', ] from ...fluid.dygraph import layers from ...fluid import core from ...fluid.framework import in_dygraph_mode -from .. import functional +from ...fluid.param_attr import ParamAttr +from ...fluid.initializer import Constant +from .. import functional as F + + +class ELU(layers.Layer): + """ + ELU Activation. + + .. math:: + + ELU(x) = max(0, x) + min(0, \\alpha * (e^{x}-1)) + + Parameters: + alpha (float, optional): The 'alpha' value of the ELU formulation. Default is 1.0. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[-1,6],[1,15.6]])) + m = paddle.nn.ELU(0.2) + out = m(x) + # [[-0.12642411 6. ] + # [ 1. 15.6 ]] + """ + + def __init__(self, alpha=1.0, name=None): + super(ELU, self).__init__() + self._alpha = alpha + self._name = name + + def forward(self, x): + return F.elu(x, self._alpha, self._name) + + +class GELU(layers.Layer): + """ + GELU Activation. + + If approximate is True + + .. math:: + + GELU(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3}))) + + else + + .. math:: + + GELU(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}})) + + Parameters: + approximate (bool, optional): Wether to enable approximation. Default is False. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]])) + + m = paddle.nn.GELU() + out = m(x) # [-0.158655 0.345731 0.841345 1.39979] + + m = paddle.nn.GELU(True) + out = m(x) # [-0.158808 0.345714 0.841192 1.39957] + """ + + def __init__(self, approximate=False, name=None): + super(GELU, self).__init__() + self._approximate = approximate + self._name = name + + def forward(self, x): + return F.gelu(x, self._approximate, self._name) + + +class Hardshrink(layers.Layer): + """ + Hardshrink Activation + + .. math:: + + hardshrink(x)= + \left\{ + \begin{aligned} + &x, & & if \ x > threshold \\ + &x, & & if \ x < -threshold \\ + &0, & & if \ others + \end{aligned} + \right. + + Parameters: + threshold (float, optional): The value of threshold for hardthrink. Default is 0.5 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1, 0.3, 2.5])) + m = paddle.nn.Hardshrink() + out = m(x) # [-1., 0., 2.5] + """ + + def __init__(self, threshold=0.5, name=None): + super(Hardshrink, self).__init__() + self._threshold = threshold + self._name = name + + def forward(self, x): + return F.hardshrink(x, self._threshold, self._name) + + +class Tanh(layers.Layer): + """ + Tanh Activation. + + .. math:: + Tanh(x) = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} + + Parameters: + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + m = paddle.nn.Tanh() + out = m(x) + print(out.numpy()) + # [-0.37994896 -0.19737532 0.09966799 0.29131261] + """ + + def __init__(self, name=None): + super(Tanh, self).__init__() + self._name = name + + def forward(self, x): + return F.tanh(x, self._name) + + +class Hardtanh(layers.Layer): + """ + Hardtanh Activation + + .. math:: + + Hardtanh(x)= \\begin{cases} + max, \\text{if } x > max \\\\ + min, \\text{if } x < min \\\\ + x, \\text{otherwise} + \\end{cases} + + Parameters: + min (float, optional): The value of min for Hardtanh. Default is -1. + max (float, optional): The value of max for Hardtanh. Default is 1. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1.5, 0.3, 2.5])) + m = paddle.nn.Hardtanh() + out = m(x) # # [-1., 0.3, 1.] + """ + + def __init__(self, min=-1.0, max=1.0, name=None): + super(Hardtanh, self).__init__() + self._min = min + self._max = max + self._name = name + + def forward(self, x): + return F.hardtanh(x, self._min, self._max, self._name) class HSigmoid(layers.Layer): @@ -154,7 +391,7 @@ class HSigmoid(layers.Layer): [C, 1], attr=self._bias_attr, is_bias=True, dtype=self._dtype) def forward(self, input, label, path_table=None, path_code=None): - out = functional.hsigmoid( + out = F.hsigmoid( input, label, self.weight, @@ -166,46 +403,189 @@ class HSigmoid(layers.Layer): return out -class ReLU(layers.Layer): +class PReLU(layers.Layer): """ - :alias_main: paddle.nn.ReLU - :alias: paddle.nn.ReLU,paddle.nn.layer.ReLU,paddle.nn.layer.activation.ReLU + PReLU Activation. + + .. math:: + + PReLU(x) = max(0, x) + weight * min(0, x) + + Parameters: + num_parameters (int, optional): Number of `weight` to learn. The supported values are: + 1 - a single parameter `alpha` is used for all input channels; + Number of channels - a seperate `alpha` is used for each input channel. + Default is 1. + init (float, optional): Init value of learnable `weight`. Default is 0.25. + weight_attr(ParamAttr, optional): The parameter attribute for the learnable `weight`. + Default is None. For more information, please refer to :ref:`api_fluid_ParamAttr`. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + data = np.array([[[[-2.0, 3.0, -4.0, 5.0], + [ 3.0, -4.0, 5.0, -6.0], + [-7.0, -8.0, 8.0, 9.0]], + [[ 1.0, -2.0, -3.0, 4.0], + [-5.0, 6.0, 7.0, -8.0], + [ 6.0, 7.0, 8.0, 9.0]]]], 'float32') + x = paddle.to_tensor(data) + m = paddle.nn.PReLU(1, 0.25) + out = m(x) + # [[[[-0.5 , 3. , -1. , 5. ], + # [ 3. , -1. , 5. , -1.5 ], + # [-1.75, -2. , 8. , 9. ]], + # [[ 1. , -0.5 , -0.75, 4. ], + # [-1.25, 6. , 7. , -2. ], + # [ 6. , 7. , 8. , 9. ]]]] + """ + + def __init__(self, num_parameters=1, init=0.25, weight_attr=None, + name=None): + super(PReLU, self).__init__() + self._num_parameters = num_parameters + self._init = init + self._weight_attr = weight_attr + self._name = name + + self._weight = self.create_parameter( + attr=self._weight_attr, + shape=[num_parameters], + dtype='float32', + is_bias=False, + default_initializer=Constant(init)) + + def forward(self, x): + return F.prelu(x, self._weight) + +class ReLU(layers.Layer): + """ ReLU Activation. - .. math: + .. math:: - out = max(x, 0) + ReLU(x) = max(x, 0) Parameters: - inplace (bool, optional): If inplace is True, the input and output of - ``ReLU`` are the same variable. Otherwise, the input and output of - ``ReLU`` are different variables. Default False. Note that if x is - more than one OPs' input, inplace must be False. - - Returns: - None + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.nn as nn - import numpy as np + import paddle + import numpy as np - data = np.array([-2, 0, 1]).astype('float32') - my_relu = nn.ReLU() - with fluid.dygraph.guard(): - data = fluid.dygraph.to_variable(data) - res = my_relu(data) # [0, 0, 1] + paddle.disable_static() + + x = paddle.to_tensor(np.array([-2, 0, 1]).astype('float32')) + m = paddle.nn.ReLU() + out = m(x) # [0., 0., 1.] """ - def __init__(self, inplace=False): + def __init__(self, name=None): super(ReLU, self).__init__() - self._inplace = inplace + self._name = name + + def forward(self, x): + return F.relu(x, self._name) - def forward(self, input): - return functional.relu(input, self._inplace) + +class ReLU6(layers.Layer): + """ + ReLU6 Activation + + .. math:: + + ReLU6(x) = min(max(0,x), 6) + + Parameters: + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-1, 0.3, 6.5])) + m = paddle.nn.ReLU6() + out = m(x) # [0, 0.3, 6] + """ + + def __init__(self, name=None): + super(ReLU6, self).__init__() + self._name = name + + def forward(self, x): + return F.relu6(x, self._name) + + +class SELU(layers.Layer): + """ + SELU Activation + + .. math:: + + SELU(x) = scale * (max(0,x) + min(0, alpha * (e^{x} - 1))) + + Parameters: + scale (float, optional): The value of scale for SELU. Default is 1.0507009873554804934193349852946 + alpha (float, optional): The value of alpha for SELU. Default is 1.6732632423543772848170429916717 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([[0.0, 1.0],[2.0, 3.0]])) + m = paddle.nn.SELU() + out = m(x) # [[0, 1.050701],[2.101402, 3.152103]] + """ + + def __init__(self, + scale=1.0507009873554804934193349852946, + alpha=1.6732632423543772848170429916717, + name=None): + super(SELU, self).__init__() + self._scale = scale + self._alpha = alpha + self._name = name + + def forward(self, x): + return F.selu(x, self._scale, self._alpha, self._name) class LeakyReLU(layers.Layer): @@ -214,11 +594,17 @@ class LeakyReLU(layers.Layer): .. math: - out = max(x, alpha * x) + LeakyReLU(x)= + \left\{ + \begin{aligned} + &x, & & if \ x >= 0 \\ + &negative\_slope * x, & & otherwise \\ + \end{aligned} + \right. \\ Parameters: - alpha (float, optional): Slope of the activation function at :math:`x < 0` . - Default: 0.01. + negative_slope (float, optional): Slope of the activation function at + :math:`x < 0` . Default is 0.01. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -229,121 +615,443 @@ class LeakyReLU(layers.Layer): Examples: .. code-block:: python - import paddle - import numpy as np + import paddle + import numpy as np - paddle.disable_static() + paddle.disable_static() - lrelu = paddle.nn.LeakyReLU() - x = paddle.to_variable(np.array([-2, 0, 1], 'float32')) - out = lrelu(x) # [-0.02, 0., 1.] + m = paddle.nn.LeakyReLU() + x = paddle.to_tensor(np.array([-2, 0, 1], 'float32')) + out = m(x) # [-0.02, 0., 1.] """ - def __init__(self, alpha=1e-2, name=None): + def __init__(self, negative_slope=0.01, name=None): super(LeakyReLU, self).__init__() - self._alpha = alpha + self._negative_slope = negative_slope self._name = name def forward(self, x): - return functional.leaky_relu(x, self._alpha, self._name) + return F.leaky_relu(x, self._negative_slope, self._name) class Sigmoid(layers.Layer): """ - :alias_main: paddle.nn.Sigmoid - :alias: paddle.nn.Sigmoid,paddle.nn.layer.Sigmoid,paddle.nn.layer.activation.Sigmoid + this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x. + + .. math:: - Sigmoid Activation. + Sigmoid(x) = \frac{1}{1 + e^{-x}} - .. math: + Parameters: + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - output = \frac{1}{1 + e^{-input}} + Shape: + x: N-D tensor, available dtype is float16, float32, float64. - Parameters: - inplace (bool, optional): If inplace is True, the input and output - are the same variable. Otherwise, the input and output - are different variables. Default False. Note that if x is - more than one OPs' input, inplace must be False. - Returns: - None + A callable object of Sigmoid. Examples: + .. code-block:: python - import paddle.fluid as fluid - import paddle.nn as nn import numpy as np - input = fluid.data(name="input", shape=[None, 4]) - output = nn.Sigmoid()(input) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) + import paddle + + paddle.disable_static() input_data = np.array([1.0, 2.0, 3.0, 4.0]).astype('float32') - output_data = exe.run(feed={"input": input_data}, - fetch_list=[output]) - print(output_data) # [0.7310586, 0.880797, 0.95257413, 0.98201376] + m = paddle.nn.Sigmoid() + x = paddle.to_tensor(input_data) + output = m(x) + print(output.numpy()) # [0.7310586, 0.880797, 0.95257413, 0.98201376] """ - def __init__(self, inplace=False): + def __init__(self, name=None): super(Sigmoid, self).__init__() - self._inplace = inplace + self.name = name - def forward(self, input): - return functional.sigmoid(input, self._inplace) + def forward(self, x): + return F.sigmoid(x, self.name) -class LogSoftmax(layers.Layer): +class Softplus(layers.Layer): + """ + Softplus Activation + + .. math:: + + Softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\ + \\text{For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.} + + Parameters: + beta (float, optional): The value of beta for Softplus. Default is 1 + threshold (float, optional): The value of threshold for Softplus. Default is 20 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + m = paddle.nn.Softplus() + out = m(x) # [0.513015, 0.598139, 0.744397, 0.854355] + """ + + def __init__(self, beta=1, threshold=20, name=None): + super(Softplus, self).__init__() + self._beta = beta + self._threshold = threshold + self._name = name + + def forward(self, x): + return F.softplus(x, self._beta, self._threshold, self._name) + + +class Softshrink(layers.Layer): + """ + Softshrink Activation + + .. math:: + + Softshrink(x)= \\begin{cases} + x - threshold, \\text{if } x > threshold \\\\ + x + threshold, \\text{if } x < -threshold \\\\ + 0, \\text{otherwise} + \\end{cases} + + Parameters: + threshold (float, optional): The value of threshold(must be no less than zero) for softplus. Default is 0.5 + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.9, -0.2, 0.1, 0.8])) + m = paddle.nn.Softshrink() + out = m(x) # [-0.4, 0, 0, 0.3] + """ + + def __init__(self, threshold=0.5, name=None): + super(Softshrink, self).__init__() + self._threshold = threshold + self._name = name + + def forward(self, x): + return F.softshrink(x, self._threshold, self._name) + + +class Softsign(layers.Layer): + """ + Softsign Activation + + .. math:: + + Softsign(x) = \\frac{x}{1 + |x|} + + Parameters: + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + m = paddle.nn.Softsign() + out = m(x) # [-0.285714, -0.166667, 0.0909091, 0.230769] + """ + + def __init__(self, name=None): + super(Softsign, self).__init__() + self._name = name + + def forward(self, x): + return F.softsign(x, self._name) + + +class Tanhshrink(layers.Layer): + """ + Tanhshrink Activation + + .. math:: + + Tanhshrink(x) = x - tanh(x) + + Parameters: + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + m = paddle.nn.Tanhshrink() + out = m(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739] + """ + + def __init__(self, name=None): + super(Tanhshrink, self).__init__() + self._name = name + + def forward(self, x): + return F.tanhshrink(x, self._name) + + +class LogSigmoid(layers.Layer): """ - :alias_main: paddle.nn.LogSoftmax - :alias: paddle.nn.LogSoftmax,paddle.nn.layer.LogSoftmax,paddle.nn.layer.activation.LogSoftmax + LogSigmoid Activation. + + .. math:: + + LogSigmoid(x) = log \\frac{1}{1 + e^{-x}} + + Parameters: + x (Tensor): The input Tensor with data type float32, or float64. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0])) + m = paddle.nn.LogSigmoid() + out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] + """ + + def __init__(self, name=None): + super(LogSigmoid, self).__init__() + self._name = name + + def forward(self, x): + return F.logsigmoid(x, self._name) + + +class Softmax(layers.Layer): + """ + Softmax Activation. + + This operator implements the softmax layer. The calculation process is as follows: + + 1. The dimension :attr:`axis` of ``x`` will be permuted to the last. + 2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second + dimension(row length) is the same as the dimension :attr:`axis` of ``x``, + and the first dimension(column length) is the product of all other dimensions + of ``x``. For each row of the matrix, the softmax operator squashes the + K-dimensional(K is the width of the matrix, which is also the size of ``x``'s + dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional + vector of real values in the range [0, 1] that add up to 1. + + 3. After the softmax operation is completed, the inverse operations of steps 1 and 2 + are performed to restore the two-dimensional matrix to the same dimension as the ``x`` . + + It computes the exponential of the given dimension and the sum of exponential + values of all the other dimensions in the K-dimensional vector input. + Then the ratio of the exponential of the given dimension and the sum of + exponential values of all the other dimensions is the output of the softmax + operator. + + For each row :math:`i` and each column :math:`j` in the matrix, we have: + + .. math:: + + Softmax[i, j] = \\frac{\\exp(x[i, j])}{\\sum_j(exp(x[i, j])} + + Example: + + .. code-block:: text + + Case 1: + Input: + x.shape = [2, 3, 4] + x.data = [[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]] + + Attrs: + axis = -1 + + Output: + out.shape = [2, 3, 4] + out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.07232949, 0.19661193, 0.19661193, 0.53444665]], + [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + + Case 2: + Input: + x.shape = [2, 3, 4] + x.data = [[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]] + Attrs: + axis = 1 + + Output: + out.shape = [2, 3, 4] + out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], + [0.01786798, 0.01786798, 0.04661262, 0.04661262], + [0.97555875, 0.97555875, 0.93623955, 0.93623955]], + [[0.00490169, 0.00490169, 0.00490169, 0.00490169], + [0.26762315, 0.26762315, 0.26762315, 0.26762315], + [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] + + Parameters: + axis (int, optional): The axis along which to perform log_softmax + calculations. It should be in range [-D, D), where D is the + dimensions of ``x`` . If ``axis`` < 0, it works the same way as + :math:`axis + D` . Default is -1. + dtype (str|np.dtype|core.VarDesc.VarType, optional): The desired data + type of the output tensor. If dtype is specified, ``x`` is casted + to ``dtype`` before the operation is performed. This is useful for + preventing data type overflows. Supported dtype: float32, float64. + If ``dtype`` is None, the output Tensor has the same dtype as x. + Default is None. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = np.array([[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]], 'float32') + x = paddle.to_tensor(x) + m = paddle.nn.Softmax() + out = m(x) + # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.07232949, 0.19661193, 0.19661193, 0.53444665]], + # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + """ + + def __init__(self, axis=-1, name=None): + super(Softmax, self).__init__() + self._axis = axis + self._dtype = None + self._name = name + + def forward(self, x): + return F.softmax(x, self._axis, self._dtype, self._name) + + +class LogSoftmax(layers.Layer): + """ This operator implements the log_softmax layer. The calculation process is as follows: .. math:: Out[i, j] = log(softmax(x)) - = log(\\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) + = log(\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}) Parameters: - axis (int, optional): The index of dimension to perform softmax calculations, it should be in - range :math:`[-1, rank-1]`, while :math:`rank` is the rank of input variable. Default: None. - None and -1 means the last dimension. - dtype (np.dtype|core.VarDesc.VarType|str): The desired data type of returned tensor. If specified, - the input tensor is casted to dtype before the operation is performed. This is useful for - preventing data type overflows. Default: None. Supported dtype: float32 or float64 + axis (int, optional): The axis along which to perform log_softmax + calculations. It should be in range [-D, D), where D is the + dimensions of the input Tensor . If ``axis`` < 0, it works the + same way as :math:`axis + D` . Default is -1. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. - Returns: - None + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.nn as nn - import numpy as np + import paddle + import numpy as np - data = np.array([[[-2.0, 3.0, -4.0, 5.0], - [3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [6.0, 7.0, 8.0, 9.0]]]).astype('float32') - my_log_softnmax = nn.LogSoftmax() - with fluid.dygraph.guard(): - data = fluid.dygraph.to_variable(data) - res = my_log_softnmax(data) - # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] - # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] - # [-16.313261 -17.313261 -1.3132617 -0.31326184]] - # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] - # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] - # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] - """ - - def __init__(self, axis=None): + paddle.disable_static() + + x = np.array([[[-2.0, 3.0, -4.0, 5.0], + [3.0, -4.0, 5.0, -6.0], + [-7.0, -8.0, 8.0, 9.0]], + [[1.0, -2.0, -3.0, 4.0], + [-5.0, 6.0, 7.0, -8.0], + [6.0, 7.0, 8.0, 9.0]]]) + m = paddle.nn.LogSoftmax() + x = paddle.to_tensor(x) + out = m(x) + # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] + # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] + # [-16.313261 -17.313261 -1.3132617 -0.31326184]] + # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] + # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] + # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] + """ + + def __init__(self, axis=-1, name=None): super(LogSoftmax, self).__init__() self._axis = axis + self._name = name - def forward(self, input): - return functional.log_softmax(input, self._axis) + def forward(self, x): + return F.log_softmax(x, self._axis) diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 45259bea49d42eb07e0e593531a1680359f81a68..8a73cfb8ccda15505d8668eff6776aac387c134f 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define the common classes to build a neural network +# TODO: define the common classes to build a neural network from ...fluid.dygraph import BilinearTensorProduct #DEFINE_ALIAS from ...fluid.dygraph import Pool2D #DEFINE_ALIAS from ...fluid.dygraph import Embedding #DEFINE_ALIAS @@ -20,10 +20,30 @@ from ...fluid.dygraph import Linear #DEFINE_ALIAS from ...fluid.dygraph import Flatten #DEFINE_ALIAS from ...fluid.dygraph import layers from .. import functional as F +from ...fluid.framework import _dygraph_tracer __all__ = [ - 'BilinearTensorProduct', 'Pool2D', 'Embedding', 'Linear', 'UpSample', - 'Pad2D' + 'BilinearTensorProduct', + 'Pool2D', + 'Embedding', + 'Linear', + 'UpSample', + 'Pad2D', + 'ReflectionPad1d', + 'ReplicationPad1d', + 'ConstantPad1d', + 'ReflectionPad2d', + 'ReplicationPad2d', + 'ConstantPad2d', + 'ZeroPad2d', + 'ConstantPad3d', + 'ReplicationPad3d', + 'CosineSimilarity', + 'Dropout', + 'Dropout2D', + 'Dropout3D', + 'Bilinear', + 'AlphaDropout', ] @@ -258,12 +278,10 @@ class Pad2D(layers.Layer): """ :alias_main: paddle.nn.Pad2D :alias: paddle.nn.Pad2D,paddle.nn.layer.Pad2D,paddle.nn.layer.common.Pad2D - This interface is used to construct a callable object of the ``Pad2D`` class. The Pad2D layer pads the input tensor boundaries according to 'paddings' and 'mode'. If mode is 'reflect', paddings[0] and paddings[1] must be no greater than height-1. And the width dimension has the same condition. - Parameters: paddings (int | List[int32]): The padding size. If padding is a int, uses the same padding in all boundaries, if padding is a List, it must contain four integers, @@ -278,16 +296,12 @@ class Pad2D(layers.Layer): data_format (str): An string from: "NHWC", "NCHW". Specify the data format of the input data. Default is "NCHW" - Returns: None - Examples: .. code-block:: text - Input = [[[[1., 2., 3.], [4., 5., 6.]]]] - Case 0: paddings = [0, 1, 2, 3], mode = 'constant' @@ -295,24 +309,20 @@ class Pad2D(layers.Layer): Out = [[[[0., 0., 1., 2., 3., 0., 0., 0.], [0., 0., 4., 5., 6., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.]]]] - Case 1: paddings = [0, 1, 2, 1], mode = 'reflect' Out = [[[[3., 2., 1., 2., 3., 2.], [6., 5., 4., 5., 6., 5.], [3., 2., 1., 2., 3., 2.]]]] - Case 2: paddings = [0, 1, 2, 1], mode = 'edge' Out = [[[[1., 1., 1., 2., 3., 3.], [4., 4., 4., 5., 6., 6.], [4., 4., 4., 5., 6., 6.]]]] - Code Examples: .. code-block:: python - import paddle.fluid as fluid import paddle.nn as nn import numpy as np @@ -342,3 +352,942 @@ class Pad2D(layers.Layer): mode=self._mode, pad_value=self._pad_value, data_format=self._data_format) + + +class Bilinear(layers.Layer): + """ + + This layer performs bilinear on two inputs. + + .. math:: + out_{i} = x1 * W_{i} * {x2^\mathrm{T}}, i=0,1,...,size-1 + out = out + b + + In this formula: + - :math:`x1`: the first input contains in1_features elements, shape is [batch_size, in1_features]. + - :math:`x2`: the second input contains in2_features elements, shape is [batch_size, in2_features]. + - :math:`W_{i}`: the i-th learned weight, shape is [in1_features, in2_features], and learned weight's shape is [out_features, in1_features, in2_features]. + - :math:`out_{i}`: the i-th element of out, shape is [batch_size, out_features]. + - :math:`b`: the learned bias, shape is [1, out_features]. + - :math:`x2^\mathrm{T}`: the transpose of :math:`x2`. + + Parameters: + in1_features (int): The dimension of each first input(`x1`). + in2_features (int): The dimension of each second input(`x2`). + out_features (int): The dimension of output of this layer. + weight_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of + this layer. The default value is None. + bias_attr (ParamAttr, optional): The parameter attribute for the bias + of this layer. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. The default value is None. + name (str, optional): The default value is None. Normally there is no need for user + to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None. + + Attribute: + **weight** (Parameter): the learnable weights of this layer. + + **bias** (Parameter): the learnable bias of this layer. + + Returns: + Variable: A 2-D Tensor of shape [batch_size, out_features]. + + Examples: + .. code-block:: python + + import paddle + import numpy + + paddle.disable_static() + layer1 = numpy.random.random((5, 5)).astype('float32') + layer2 = numpy.random.random((5, 4)).astype('float32') + bilinear = paddle.nn.Bilinear( + in1_features=5, in2_features=4, out_features=1000) + result = bilinear(paddle.to_tensor(layer1), + paddle.to_tensor(layer2)) # result shape [5, 1000] + + """ + + def __init__(self, + in1_features, + in2_features, + out_features, + weight_attr=None, + bias_attr=None, + name=None): + super(Bilinear, self).__init__() + self._weight_attr = weight_attr + self._bias_attr = bias_attr + self._name = name + self._in1_features = in1_features + self._in2_features = in2_features + self._out_features = out_features + self._dtype = self._helper.get_default_dtype() + + weight_shape = [ + self._out_features, self._in1_features, self._in2_features + ] + self.weight = self.create_parameter( + attr=self._weight_attr, + shape=weight_shape, + dtype=self._dtype, + is_bias=False) + bias_shape = [1, self._out_features] + self.bias = self.create_parameter( + attr=self._bias_attr, + shape=bias_shape, + dtype=self._dtype, + is_bias=True) + + def forward(self, x1, x2): + return F.bilinear(x1, x2, self.weight, self.bias, self._name) + + +class Dropout(layers.Layer): + """ + Dropout is a regularization technique for reducing overfitting by preventing + neuron co-adaption during training as described in the paper: + `Improving neural networks by preventing co-adaptation of feature detectors `_ + The dropout operator randomly sets the outputs of some units to zero, while upscale others + according to the given dropout probability. + + See ``paddle.nn.functional.dropout`` for more details. + + In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. + + Parameters: + p (float | int): Probability of setting units to zero. Default: 0.5 + axis (int | list): The axis along which the dropout is performed. Default None. + mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer'] + + 1. upscale_in_train(default), upscale the output at training time + + - train: out = input * mask / ( 1.0 - p ) + - inference: out = input + + 2. downscale_in_infer, downscale the output at inference + + - train: out = input * mask + - inference: out = input * (1.0 - p) + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: N-D tensor. + - output: N-D tensor, the same shape as input. + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.array([[1,2,3], [4,5,6]]).astype('float32') + x = paddle.to_tensor(x) + m = paddle.nn.Dropout(p=0.5) + y_train = m(x) + m.eval() # switch the model to test phase + y_test = m(x) + print(x.numpy()) + print(y_train.numpy()) + print(y_test.numpy()) + """ + + def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None): + super(Dropout, self).__init__() + + self.p = p + self.axis = axis + self.mode = mode + self.name = name + + def forward(self, input): + out = F.dropout( + input, + p=self.p, + axis=self.axis, + training=self.training, + mode=self.mode, + name=self.name) + return out + + +class Dropout2D(layers.Layer): + """ + Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , + a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently + on every forward call with probability `p` using samples from a Bernoulli distribution. + Dropout2d will help promote independence between feature maps as described in the paper: + `Efficient Object Localization Using Convolutional Networks `_ + + See ``paddle.nn.functional.dropout2d`` for more details. + + In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. + + Parameters: + p (float, optional): Probability of setting units to zero. Default: 0.5 + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: + `NCHW`, `NHWC`. The default is `NCHW`. When it is `NCHW`, the data is + stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: 4-D tensor. + - output: 4-D tensor, the same shape as input. + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.random.random(size=(2, 3, 4, 5)).astype('float32') + x = paddle.to_tensor(x) + m = paddle.nn.Dropout2D(p=0.5) + y_train = m(x) + m.eval() # switch the model to test phase + y_test = m(x) + print(x.numpy()) + print(y_train.numpy()) + print(y_test.numpy()) + """ + + def __init__(self, p=0.5, data_format='NCHW', name=None): + super(Dropout2D, self).__init__() + + self.p = p + self.data_format = data_format + self.name = name + + def forward(self, input): + out = F.dropout2d( + input, + p=self.p, + training=self.training, + data_format=self.data_format, + name=self.name) + return out + + +class Dropout3D(layers.Layer): + """ + Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , + a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently + on every forward call with probability `p` using samples from a Bernoulli distribution. + Dropout3d will help promote independence between feature maps as described in the paper: + `Efficient Object Localization Using Convolutional Networks `_ + + See ``paddle.nn.functional.dropout3d`` for more details. + + In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. + + Parameters: + p (float | int): Probability of setting units to zero. Default: 0.5 + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: + `NCDHW`, `NDHWC`. The default is `NCDHW`. When it is `NCDHW`, the data is + stored in the order of: [batch_size, input_channels, input_depth, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: 5-D tensor. + - output: 5-D tensor, the same shape as input. + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.random.random(size=(2, 3, 4, 5, 6)).astype('float32') + x = paddle.to_tensor(x) + m = paddle.nn.Dropout3D(p=0.5) + y_train = m(x) + m.eval() # switch the model to test phase + y_test = m(x) + print(x.numpy()) + print(y_train.numpy()) + print(y_test.numpy()) + """ + + def __init__(self, p=0.5, data_format='NCDHW', name=None): + super(Dropout3D, self).__init__() + + self.p = p + self.data_format = data_format + self.name = name + + def forward(self, input): + out = F.dropout3d( + input, + p=self.p, + training=self.training, + data_format=self.data_format, + name=self.name) + return out + + +class AlphaDropout(layers.Layer): + """ + Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with + zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and + standard deviation of the input. Alpha Dropout fits well to SELU activate function by randomly setting + activations to the negative saturation value. + + For more information, please refer to: + `Self-Normalizing Neural Networks `_ + + In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled. + + Parameters: + p (float | int): Probability of setting units to zero. Default: 0.5 + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: N-D tensor. + - output: N-D tensor, the same shape as input. + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + x = np.array([[-1, 1], [-1, 1]]).astype('float32') + x = paddle.to_tensor(x) + m = paddle.nn.AlphaDropout(p=0.5) + y_train = m(x) + m.eval() # switch the model to test phase + y_test = m(x) + print(x.numpy()) + print(y_train.numpy()) + # [[-0.10721093, 1.6655989 ], [-0.7791938, -0.7791938]] (randomly) + print(y_test.numpy()) + """ + + def __init__(self, p=0.5, name=None): + super(AlphaDropout, self).__init__() + self.p = p + self.name = name + + def forward(self, input): + out = F.alpha_dropout( + input, p=self.p, training=self.training, name=self.name) + return out + + +class ReflectionPad1d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ReflectionPad1d`` class. + Uses reflection of the input boundaries to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right). + data_format (str): An string from: "NCL", "NLC". Specify the data format of the input data. + Default is "NCL" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[1., 2., 3.], + [4., 5., 6.]]] + padding = [1, 2], + Out = [[[2. 1. 2. 3. 2. 1.] + [5. 4. 5. 6. 5. 4.]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 2, 3) + pad = [1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ReflectionPad1d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[2. 1. 2. 3. 2. 1.] + # [5. 4. 5. 6. 5. 4.]]] + """ + + def __init__(self, padding, data_format="NCL", name=None): + super(ReflectionPad1d, self).__init__() + self._mode = "reflect" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class ReplicationPad1d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ReplicationPad1d`` class. + Uses input boundaries to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right). + data_format (str): An string from: "NCL", "NLC". Specify the data format of the input data. + Default is "NCL" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[1., 2., 3.], + [4., 5., 6.]]] + padding = [1, 2], + Out = [[[2. 1. 2. 3. 2. 1.] + [5. 4. 5. 6. 5. 4.]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 2, 3) + pad = [1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ReplicationPad1d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[1. 1. 2. 3. 3. 3.] + # [1. 4. 5. 6. 6. 6.]]] + """ + + def __init__(self, padding, data_format="NCL", name=None): + super(ReplicationPad1d, self).__init__() + self._mode = "replicate" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class ConstantPad1d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ConstantPad1d`` class. + Uses a constant value to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right). + value (float32): The value to fill the padded areas. Default is 0.0 + data_format (str): An string from: "NCL", "NLC". Specify the data format of the input data. + Default is "NCL" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[1., 2., 3.], + [4., 5., 6.]]] + padding = [1, 2], + value = 0.0 + Out = [[[0. 1. 2. 3. 0. 0.] + [0. 4. 5. 6. 0. 0.]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 2, 3) + pad = [1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ConstantPad1d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[0. 1. 2. 3. 0. 0.] + # [0. 4. 5. 6. 0. 0.]]] + """ + + def __init__(self, padding, value=0.0, data_format="NCL", name=None): + super(ConstantPad1d, self).__init__() + self._mode = "constant" + self._data_format = data_format + self._pad = padding + self._value = value + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + value=self._value, + data_format=self._data_format, + name=self._name) + + +class ConstantPad2d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ConstantPad2d`` class. + Uses a constant value to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom). + value (float32): The value to fill the padded areas. Default is 0.0 + data_format (str): An string from: "NCHW", "NHWC". Specify the data format of the input data. + Default is "NCHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[1., 2., 3.], + [4., 5., 6.]]]] + padding = [1, 1, 0, 0] + value = 0.0 + Out = [[[[0. 1. 2. 3. 0.] + [0. 4. 5. 6. 0.]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 2, 3) + pad = [1, 0, 1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ConstantPad2d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[0. 0. 0. 0.] + # [0. 1. 2. 3.] + # [0. 4. 5. 6.] + # [0. 0. 0. 0.] + # [0. 0. 0. 0.]]]] + """ + + def __init__(self, padding, value=0.0, data_format="NCHW", name=None): + super(ConstantPad2d, self).__init__() + self._mode = "constant" + self._data_format = data_format + self._pad = padding + self._value = value + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + value=self._value, + data_format=self._data_format, + name=self._name) + + +class ZeroPad2d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ZeroPad2d`` class. + Uses 0 to pad the input tensor. + + Parameters: + padding (Variable | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom). + data_format (str): An string from: "NCHW", "NHWC". Specify the data format of the input data. + Default is "NCHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[1., 2., 3.], + [4., 5., 6.]]]] + padding = [1, 1, 0, 0] + Out = [[[[0. 1. 2. 3. 0.] + [0. 4. 5. 6. 0.]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 2, 3) + pad = [1, 0, 1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ZeroPad2d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[0. 0. 0. 0.] + # [0. 1. 2. 3.] + # [0. 4. 5. 6.] + # [0. 0. 0. 0.] + # [0. 0. 0. 0.]]]] + """ + + def __init__(self, padding, data_format="NCHW", name=None): + super(ZeroPad2d, self).__init__() + self._mode = "constant" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class ReplicationPad2d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ReplicationPad2d`` class. + Uses input boundaries to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom). + data_format (str): An string from: "NCHW", "NHWC". Specify the data format of the input data. + Default is "NCHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[1., 2., 3.], + [4., 5., 6.]]]] + padding = [1, 1, 0, 0] + Out = [[[[1. 1. 2. 3. 3.] + [4. 4. 5. 6. 6.]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 2, 3) + pad = [1, 0, 1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ReplicationPad2d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[1. 1. 2. 3.] + # [1. 1. 2. 3.] + # [4. 4. 5. 6.] + # [4. 4. 5. 6.] + # [4. 4. 5. 6.]]]] + """ + + def __init__(self, padding, data_format="NCHW", name=None): + super(ReplicationPad2d, self).__init__() + self._mode = "replicate" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class ReflectionPad2d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ReflectionPad2d`` class. + Uses reflection of the input boundaries to pad the input tensor. + + Parameters: + padding (Variable | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom). + data_format (str): An string from: "NCHW", "NHWC". Specify the data format of the input data. + Default is "NCHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[1., 2., 3.], + [4., 5., 6.]]]] + padding = [1, 1, 0, 0] + Out = [[[[2. 1. 2. 3. 2.] + [5. 4. 5. 6. 5.]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 4, 3) + pad = [1, 0, 1, 2] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ReflectionPad2d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[ 5. 4. 5. 6.] + # [ 2. 1. 2. 3.] + # [ 5. 4. 5. 6.] + # [ 8. 7. 8. 9.] + # [11. 10. 11. 12.] + # [ 8. 7. 8. 9.] + # [ 5. 4. 5. 6.]]]] + """ + + def __init__(self, padding, data_format="NCHW", name=None): + super(ReflectionPad2d, self).__init__() + self._mode = "reflect" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class ConstantPad3d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ConstantPad3d`` class. + Uses a constant value to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back). + value (float32): The value to fill the padded areas. Default is 0.0 + data_format (str): An string from: "NCDHW", "NDHWC". Specify the data format of the input data. + Default is "NCDHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[[1., 2., 3.], + [4., 5., 6.]]]]] + padding = [1, 2, 0, 0, 0, 0] + value = 0.0 + Out = [[[[[0. 1. 2. 3. 0. 0.] + [0. 4. 5. 6. 0. 0.]]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 1, 2, 3) + pad = [1, 0, 1, 2, 0, 0] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ConstantPad3d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[[0. 0. 0. 0.] + # [0. 1. 2. 3.] + # [0. 4. 5. 6.] + # [0. 0. 0. 0.] + # [0. 0. 0. 0.]]]]] + """ + + def __init__(self, padding, value=0.0, data_format="NCDHW", name=None): + super(ConstantPad3d, self).__init__() + self._mode = "constant" + self._data_format = data_format + self._pad = padding + self._value = value + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + value=self._value, + data_format=self._data_format, + name=self._name) + + +class ReplicationPad3d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ReplicationPad3d`` class. + Uses input boundaries to pad the input tensor. + + Parameters: + padding (Tensor | List[int32]): The padding size with data type int32. [len(padding)/2] dimensions + of input will be padded. The pad has the form (pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back). + data_format (str): An string from: "NCDHW", "NDHWC". Specify the data format of the input data. + Default is "NCDHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + None + + Examples: + .. code-block:: text + + x = [[[[[1., 2., 3.], + [4., 5., 6.]]]]] + padding = [1, 2, 0, 0, 0, 0] + Out = [[[[[1. 1. 2. 3. 3. 3.] + [4. 4. 5. 6. 6. 6.]]]]] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + input_shape = (1, 1, 1, 2, 3) + pad = [1, 0, 1, 2, 0, 0] + data = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape) + 1 + my_pad = nn.ReplicationPad3d(padding=pad) + data = paddle.to_tensor(data) + result = my_pad(data) + print(result.numpy()) + # [[[[[1. 1. 2. 3.] + # [1. 1. 2. 3.] + # [4. 4. 5. 6.] + # [4. 4. 5. 6.] + # [4. 4. 5. 6.]]]]] + """ + + def __init__(self, padding, data_format="NCDHW", name=None): + super(ReplicationPad3d, self).__init__() + self._mode = "replicate" + self._data_format = data_format + self._pad = padding + self._name = name + + def forward(self, x): + return F.pad(x, + pad=self._pad, + mode=self._mode, + data_format=self._data_format, + name=self._name) + + +class CosineSimilarity(layers.Layer): + """ + This interface is used to compute cosine similarity between x1 and x2 along axis. + + Parameters: + axis (int): Dimension of vectors to compute cosine similarity. Default is 1. + eps(float): Small value to avoid division by zero. Default is 1e-8. + Returns: + None + + Examples: + .. code-block:: text + + Case 0: + x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ] + [0.48949873 0.5797396 0.65444374 0.66510963] + [0.1031398 0.9614342 0.08365563 0.6796464 ] + [0.10760343 0.7461209 0.7726148 0.5801006 ]] + x2 = [[0.62913156 0.1536727 0.9847992 0.04591406] + [0.9098952 0.15715368 0.8671125 0.3156102 ] + [0.4427798 0.54136837 0.5276275 0.32394758] + [0.3769419 0.8535014 0.48041078 0.9256797 ]] + axis = 1 + eps = 1e-8 + Out: [0.5275037 0.8368967 0.75037485 0.9245899] + + Code Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + np.random.seed(0) + x1 = np.random.rand(2,3) + x2 = np.random.rand(2,3) + x1 = paddle.to_tensor(x1) + x2 = paddle.to_tensor(x2) + + cos_sim_func = nn.CosineSimilarity(axis=0) + result = cos_sim_func(x1, x2) + print(result.numpy()) + # [0.99806249 0.9817672 0.94987036] + """ + + def __init__(self, axis=1, eps=1e-8): + super(CosineSimilarity, self).__init__() + self._axis = axis + self._eps = eps + + def forward(self, x1, x2): + return F.cosine_similarity(x1, x2, axis=self._axis, eps=self._eps) diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 9fb6c9ebc2e404ab477630aae99a6b43d683b20b..7d0e59fb7575c9d15d28e88a462aed4ddba47fb9 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -15,12 +15,12 @@ # TODO: define classes of convolutional neural network __all__ = [ - 'Conv2D', - 'Conv2DTranspose', - 'Conv3D', - 'Conv3DTranspose', - # 'TreeConv', - # 'Conv1D' + 'Conv1d', + 'Conv2d', + 'Conv3d', + 'ConvTranspose1d', + 'ConvTranspose2d', + 'ConvTranspose3d', ] import numpy as np @@ -38,12 +38,270 @@ def _get_default_param_initializer(num_channels, filter_size): return Normal(0.0, std, 0) -class Conv2D(layers.Layer): +def _reverse_repeat_list(t, n): + """Reverse the order of `t` and repeat each element for `n` times. + This can be used to translate padding arg used by Conv and Pooling modules + to the ones used by `F.pad`. """ - :alias_main: paddle.nn.Conv2D - :alias: paddle.nn.Conv2D,paddle.nn.layer.Conv2D,paddle.nn.layer.conv.Conv2D + return list(x for x in reversed(t) for _ in range(n)) - This interface is used to construct a callable object of the ``Conv2D`` class. + +class _ConvNd(layers.Layer): + def __init__(self, + in_channels, + out_channels, + kernel_size, + transposed, + dims, + stride=1, + padding=0, + padding_mode='zeros', + output_padding=0, + dilation=1, + groups=1, + weight_attr=None, + bias_attr=None, + data_format="NCHW"): + super(_ConvNd, self).__init__() + assert weight_attr is not False, "weight_attr should not be False in Conv." + self._param_attr = weight_attr + self._bias_attr = bias_attr + self._groups = groups + self._in_channels = in_channels + self._out_channels = out_channels + self._data_format = data_format + + valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} + if padding_mode not in valid_padding_modes: + raise ValueError( + "padding_mode must be one of {}, but got padding_mode='{}'". + format(valid_padding_modes, padding_mode)) + + if padding_mode in {'reflect', 'replicate', 'circular' + } and not isinstance(padding, np.int): + raise TypeError( + "when padding_mode in ['reflect', 'replicate', 'circular'], type of padding must be int" + ) + + self._stride = utils.convert_to_list(stride, dims, 'stride') + self._dilation = utils.convert_to_list(dilation, dims, 'dilation') + self._kernel_size = utils.convert_to_list(kernel_size, dims, + 'kernel_size') + self._padding = padding + self._padding_mode = padding_mode + self.output_padding = output_padding + + if transposed: + filter_shape = [self._in_channels, out_channels // groups + ] + self._kernel_size + else: + if in_channels % groups != 0: + raise ValueError("in_channels must be divisible by groups.") + + if padding_mode in {'reflect', 'replicate', 'circular'}: + _paired_padding = utils.convert_to_list(padding, 2, 'padding') + self._reversed_padding_repeated_twice = _reverse_repeat_list( + _paired_padding, 2) + + filter_shape = [out_channels, in_channels // groups + ] + self._kernel_size + + self.weight = self.create_parameter( + shape=filter_shape, attr=self._param_attr) + self.bias = self.create_parameter( + attr=self._bias_attr, shape=[self._out_channels], is_bias=True) + + +class Conv1d(layers.Layer): + """ + This interface is used to construct a callable object of the ``Conv1d`` class. + For more details, refer to code examples. + The convolution1D layer calculates the output based on the input, filter + and stride, padding, dilation, groups parameters. Input and + Output are in NCL format or NLC format, where N is batch size, C is the number of + the feature map, L is the length of the feature map. + Filter's shape is [MCK] , where M is the number of output feature map, + C is the number of input feature map, K is the size of the kernel. + If the groups is greater than 1, C will equal the number of input feature map divided by the groups. + If bias attribution and activation type are provided, bias is added to the + output of the convolution, and the corresponding activation function is + applied to the final result. + For each input :math:`X`, the equation is: + .. math:: + Out = \\sigma (W \\ast X + b) + Where: + * :math:`X`: Input value, a ``Tensor`` with 'NCL' format or 'NLC' format. + * :math:`W`: Filter value, a ``Tensor`` with shape [MCK] . + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + Example: + - Input: + Input shape: :math:`(N, C_{in}, L_{in})` + Kernel shape: :math:`(C_{out}, C_{in}, K)` + - Output: + Output shape: :math:`(N, C_{out}, L_{out})` + Where + .. math:: + L_{out}&= \\frac{(L_{in} + 2 * padding - (dilation * (L_f - 1) + 1))}{stride} + 1 + Parameters: + in_channels(int): The number of channels in the input image. + out_channels(int): The number of filter. It is as same as the output + feature map. + kernel_size (int|tuple|list): The filter size. If kernel_size is a tuple, + it must contain one integer, (kernel_size). + stride (int|tuple|list, optional): The stride size. If stride is a tuple, it must + contain one integer, (stride_size). Default: 1. + padding(int|str|tuple|list, optional): The size of zeros to be padded. It must be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means the feature map is zero paded by size of `padding` on both sides. + 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. + The default value is 0. + dilation (int|tuple|list, optional): The dilation size. If dilation is a tuple, it must + contain one integer, (dilation_size). Default: 1. + groups (int, optional): The groups number of the conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: 1. + padding_mode(str, optional): Four modes: 'zeros', 'reflect', 'replicate', 'circular'. + When in 'zeros' mode, this op uses zeros to pad the input tensor. + When in 'reflect' mode, uses reflection of the input boundaries to pad the input tensor. + When in 'replicate' mode, uses input boundaries to pad the input tensor. + When in 'circular' mode, uses circular input to pad the input tensor. + Default is 'zeros'. + bias(bool, optional): Whether to use bias. Default: True. + param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + of conv1d. If it is set to None or one attribute of ParamAttr, conv1d + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with :math:`Normal(0.0, std)`, + and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. + bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv1d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv1d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + Attribute: + **weight** (Parameter): the learnable weights of filter of this layer. + **bias** (Parameter or None): the learnable bias of this layer. + Shape: + - x: 3-D tensor with shape: (batch, in_channels, length) or (batch, length, in_channels). + - output: 3-D tensor with same shape as input x. + + Raises: + None + Examples: + .. code-block:: python + import paddle + from paddle.nn import Conv1d + import numpy as np + x = np.array([[[4, 8, 1, 9], + [7, 2, 0, 9], + [6, 9, 2, 6]]]).astype(np.float32) + w=np.array( + [[[9, 3, 4], + [0, 0, 7], + [2, 5, 6]], + [[0, 3, 4], + [2, 9, 7], + [5, 6, 8]]]).astype(np.float32) + paddle.disable_static() + x_t = paddle.to_tensor(x) + conv = Conv1d(3, 2, 3) + conv.weight.set_value(w) + y_t = conv(x_t) + y_np = y_t.numpy() + print(y_np) + # [[[133. 238.] + # [160. 211.]]] + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + bias=True, + weight_attr=None, + bias_attr=None, + data_format="NCL", + name=None): + super(Conv1d, self).__init__() + assert weight_attr is not False, "param_attr should not be False here." + self._in_channels = in_channels + self._out_channels = out_channels + self._groups = groups + if in_channels % groups != 0: + raise ValueError("in_channels must be divisible by groups.") + self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') + self._stride = utils.convert_to_list(stride, 1, 'stride') + self._dilation = utils.convert_to_list(dilation, 1, 'dilation') + self._padding = padding # leave it to F.conv1d + self._weight_attr = weight_attr + self._bias_attr = bias_attr + self._data_format = data_format + self._name = name + + self._padding_mode = padding_mode + + valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} + if padding_mode not in valid_padding_modes: + raise ValueError( + "padding_mode must be one of {}, but got padding_mode='{}'". + format(valid_padding_modes, padding_mode)) + + if padding_mode in {'reflect', 'replicate', 'circular' + } and not isinstance(padding, np.int): + raise ValueError( + "when padding_mode in ['reflect', 'replicate', 'circular'], type of padding must be int" + ) + if not isinstance(padding, str): + self._padding = utils.convert_to_list(padding, 1, 'padding') * 2 + + num_filter_channels = in_channels // groups + filter_shape = [self._out_channels, num_filter_channels + ] + self._kernel_size + + self.weight = self.create_parameter( + attr=self._weight_attr, + shape=filter_shape, + default_initializer=_get_default_param_initializer( + self._in_channels, filter_shape)) + self.bias = self.create_parameter( + attr=self._bias_attr, shape=[self._out_channels], + is_bias=True) if bias else None + + def forward(self, x): + padding = 0 + if self._padding_mode != "zeros": + x = F.pad(x, + self._padding, + mode=self._padding_mode, + data_format=self._data_format) + else: + padding = self._padding + + out = F.conv1d( + x, + self.weight, + bias=self.bias, + padding=padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format, + name=self._name) + return out + + +class Conv2d(_ConvNd): + """ + This interface is used to construct a callable object of the ``Conv2d`` class. For more details, refer to code examples. The convolution2D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input and @@ -59,48 +317,23 @@ class Conv2D(layers.Layer): If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. - For each input :math:`X`, the equation is: - .. math:: - Out = \\sigma (W \\ast X + b) - Where: - * :math:`X`: Input value, a ``Tensor`` with NCHW format. * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. * :math:`\\sigma`: Activation function. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - - Example: - - - Input: - - Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` - - Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)` - - - Output: - - Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ - W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 - Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of filter. It is as same as the output - feature map. - filter_size (int or tuple): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. + in_channels(int): The number of channels in the input image. + out_channels(int): The number of channels produced by convolution. + kernel_size (int|list|tuple): The size of convolution kernel. + stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`on both sides @@ -108,10 +341,8 @@ class Conv2D(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - stride (int or tuple, optional): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: 1. - dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must + padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` . + dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: 1. groups (int, optional): The groups number of the Conv2d Layer. According to grouped @@ -119,129 +350,287 @@ class Conv2D(layers.Layer): the first half of the filters is only connected to the first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) of conv2d. If it is set to None or one attribute of ParamAttr, conv2d will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. - bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d. + bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d. If it is set to False, no bias will be added to the output units. If it is set to None or one attribute of ParamAttr, conv2d will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW". - dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". - Attribute: **weight** (Parameter): the learnable weights of filter of this layer. - **bias** (Parameter or None): the learnable bias of this layer. - - Returns: - None - - Raises: - ValueError: if ``use_cudnn`` is not a bool value. - + Shape: + - x: :math:`(N, C_{in}, H_{in}, W_{in})` + - output: :math:`(N, C_{out}, H_{out}, W_{out})` + Where + .. math:: + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel_size[0] - 1) + 1))}{strides[0]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel_size[1] - 1) + 1))}{strides[1]} + 1 Examples: .. code-block:: python - import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn - + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv2D(4, 6, (3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + + paddle.disable_static() + x_var = paddle.to_tensor(x) + conv = nn.Conv2d(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 6, 6) """ def __init__(self, - num_channels, - num_filters, - filter_size, - padding=0, + in_channels, + out_channels, + kernel_size, stride=1, + padding=0, dilation=1, groups=1, - param_attr=None, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format="NCHW"): + super(Conv2d, self).__init__( + in_channels, + out_channels, + kernel_size, + False, + 2, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x): + if self._padding_mode != 'zeros': + x = F.pad(x, + self._reversed_padding_repeated_twice, + mode=self._padding_mode, + data_format=self._data_format) + return F.conv2d( + x, + self.weight, + bias=self.bias, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + + out = F.conv2d( + x, + self.weight, + bias=self.bias, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + return out + + +class ConvTranspose1d(layers.Layer): + """ + This interface is used to construct a callable object of the ``ConvTranspose1d`` class. + For more details, refer to code examples. + The 1-D convolution transpose layer calculates the output based on the input, + filter, and dilation, stride, padding. Input(Input) and output(Output) + are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels, + L is the length of the feature. The details of convolution transpose + layer, please refer to the following explanation and references + `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a 3-D Tensor with 'NCL' format or 'NLC' format. + * :math:`W`: Kernel value, a 3-D Tensor with 'MCK' format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, a 3-D Tensor with data format 'NCL' of 'NLC', the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, L_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, L_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, L_{out})` + + Where + + .. math:: + + L^\prime_{out} &= (L_{in} - 1) * stride - pad_top - pad_bottom + dilation * (L_f - 1) + 1 \\\\ + L_{out} &\in [ L^\prime_{out}, L^\prime_{out} + stride ] + + Note: + The conv1d_transpose can be seen as the backward of the conv1d. For conv1d, + when stride > 1, conv1d maps multiple input shape to the same output shape, + so for conv1d_transpose, when stride > 1, input shape maps multiple output shape. + If output_size is None, :math:`L_{out} = L^\prime_{out}`; + else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}` + and :math:`L^\prime_{out} + stride`. conv1d_transpose can compute the kernel size automatically. + + Args: + in_channels(int): The number of channels in the input image. + out_channels(int): The number of the filter. It is as same as the output + feature map. + kernel_size(int|tuple|list, optional): The filter size. If kernel_size is a tuple, + it must contain one integers, (kernel_size). None if + use output size to calculate kernel_size. Default: None. kernel_size and + output_size should not be None at the same time. + stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain one integer, (stride_size). + Default: stride = 1. + padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds + `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a + string, either 'VALID' or 'SAME' supported, which is the padding algorithm. + If `padding` is a tuple or list, it could be in two forms: + `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. + output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. + If it is a tuple, it must contain one integer. Default: 0. + groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups = 1. + bias(bool, optional): Whether to use bias. Default: True. + dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain one integer, (dilation_size). + Default: dilation = 1. + weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + of conv1d_transpose. If it is set to None or one attribute of ParamAttr, conv1d_transpose + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv1d_transpose. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv1d_transpose + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + + Attribute: + **weight** (Parameter): the learnable weights of filters of this layer. + **bias** (Parameter or None): the learnable bias of this layer. + + Shape: + - x(Tensor): 3-D tensor with shape (batch, in_channels, length) when data_format is + "NCL" or shape (batch, length, in_channels) when data_format is "NLC". + - output_size(int|tuple|list, optional): The output image size. If output size is a + tuple, it must contain one integer, (feature_length). None if use + kernel_size, padding, output_padding and stride to calculate output_size. + If output_size and kernel_size are specified at the same time, They + should follow the formula above. Default: None. output_size and kernel_size + should not be None at the same time. + - output(Tensor): 3-D tensor with same shape as input x. + + Examples: + .. code-block:: python + + import paddle + from paddle.nn import ConvTranspose1d + import numpy as np + + paddle.disable_static() + # shape: (1, 2, 4) + x=np.array([[[4, 0, 9, 7], + [8, 0, 9, 2]]]).astype(np.float32) + # shape: (2, 1, 2) + y=np.array([[[7, 0]], + [[4, 2]]]).astype(np.float32) + x_t = paddle.to_tensor(x) + conv = ConvTranspose1d(2, 1, 2) + conv.weight.set_value(y) + y_t = conv(x_t) + y_np = y_t.numpy() + print y_np + + # [[[60. 16. 99. 75. 4.]]] + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + output_padding=0, + groups=1, + bias=True, + dilation=1, + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCHW", - dtype='float32'): - super(Conv2D, self).__init__() - assert param_attr is not False, "param_attr should not be False here." - self._num_channels = num_channels - self._num_filters = num_filters + data_format="NCL"): + super(ConvTranspose1d, self).__init__() + assert weight_attr is not False, "param_attr should not be False in ConvTranspose1d." + self._param_attr = weight_attr + self._bias_attr = bias_attr self._groups = groups - if num_channels % groups != 0: - raise ValueError("num_channels must be divisible by groups.") - self._act = act + self._in_channels = in_channels + self._out_channels = out_channels + self._output_padding = output_padding self._data_format = data_format - self._dtype = dtype - if not isinstance(use_cudnn, bool): - raise ValueError("use_cudnn should be True or False") - self._use_cudnn = use_cudnn - - self._filter_size = utils.convert_to_list(filter_size, 2, 'filter_size') - self._stride = utils.convert_to_list(stride, 2, 'stride') - self._dilation = utils.convert_to_list(dilation, 2, 'dilation') - channel_last = (data_format == "NHWC") - self._padding = padding # leave it to F.conv2d - - self._param_attr = param_attr - self._bias_attr = bias_attr + self._bias = bias - num_filter_channels = num_channels // groups - filter_shape = [self._num_filters, num_filter_channels - ] + self._filter_size + self._stride = utils.convert_to_list(stride, 1, 'stride') + self._dilation = utils.convert_to_list(dilation, 1, 'dilation') + self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') + self._padding = padding + filter_shape = [self._in_channels, out_channels // groups + ] + self._kernel_size self.weight = self.create_parameter( - attr=self._param_attr, - shape=filter_shape, - dtype=self._dtype, - default_initializer=_get_default_param_initializer( - self._num_channels, filter_shape)) + shape=filter_shape, attr=self._param_attr) self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + attr=self._bias_attr, shape=[self._out_channels], + is_bias=True) if self._bias else None - def forward(self, input): - out = F.conv2d( - input, + def forward(self, x, output_size=None): + out = F.conv_transpose1d( + x, self.weight, bias=self.bias, + output_size=output_size, + output_padding=self._output_padding, padding=self._padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, data_format=self._data_format) return out -class Conv2DTranspose(layers.Layer): +class ConvTranspose2d(_ConvNd): """ - :alias_main: paddle.nn.Conv2DTranspose - :alias: paddle.nn.Conv2DTranspose,paddle.nn.layer.Conv2DTranspose,paddle.nn.layer.conv.Conv2DTranspose - - This interface is used to construct a callable object of the ``Conv2DTranspose`` class. + This interface is used to construct a callable object of the ``ConvTranspose2d`` class. For more details, refer to code examples. The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input and output @@ -256,55 +645,36 @@ class Conv2DTranspose(layers.Layer): is applied to the final result. The details of convolution transpose layer, please refer to the following explanation and references `conv2dtranspose `_ . - For each input :math:`X`, the equation is: - .. math:: - Out = \sigma (W \\ast X + b) - Where: - * :math:`X`: Input value, a ``Tensor`` with NCHW format. * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. * :math:`\\sigma`: Activation function. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - Example: - - Input: - Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` - Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)` - - Output: - Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` - Where - .. math:: - H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\ H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\ W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) - Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of the filter. It is as same as the output - feature map. - filter_size(int or tuple): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - output_size(int or tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. + in_channels(int): The number of channels in the input image. + out_channels(int): The number of channels produced by the convolution. + kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple, + it must contain two integers, (kernel_size_H, kernel_size_W). + Otherwise, the kernel will be a square. + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides @@ -312,10 +682,10 @@ class Conv2DTranspose(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - stride(int or tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: 1. - dilation(int or tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: 1. groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by @@ -324,125 +694,94 @@ class Conv2DTranspose(layers.Layer): first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. Default: 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr or bool, optional): The attribute for the bias of conv2d_transpose. + bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose. If it is set to False, no bias will be added to the output units. If it is set to None or one attribute of ParamAttr, conv2d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - Default: None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCHW" or "NHWC". Default: "NCHW". - dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". - Attribute: **weight** (Parameter): the learnable weights of filters of this layer. - **bias** (Parameter or None): the learnable bias of this layer. - - Returns: - None - + Shape: + - x: :math:`(N, C_{in}, H_{in}, W_{in})` + - output: :math:`(N, C_{out}, H_{out}, W_{out})` + Where + .. math:: + H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ Examples: .. code-block:: python - import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn - + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv2DTranspose(4, 6, (3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + paddle.disable_static() + x_var = paddle.to_tensor(x) + conv = nn.ConvTranspose2d(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 10, 10) """ def __init__(self, - num_channels, - num_filters, - filter_size, - output_size=None, - padding=0, + in_channels, + out_channels, + kernel_size, stride=1, + padding=0, + output_padding=0, dilation=1, groups=1, - param_attr=None, + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCHW", - dtype='float32'): - super(Conv2DTranspose, self).__init__() - assert param_attr is not False, "param_attr should not be False in conv2d_transpose." - self._param_attr = param_attr - self._bias_attr = bias_attr - self._act = act - self._groups = groups - self._num_channels = num_channels - self._num_filters = num_filters - self._use_cudnn = use_cudnn - self._data_format = data_format - self._dtype = dtype - - self._stride = utils.convert_to_list(stride, 2, 'stride') - self._dilation = utils.convert_to_list(dilation, 2, 'dilation') - self._filter_size = utils.convert_to_list(filter_size, 2, 'filter_size') + data_format="NCHW"): + super(ConvTranspose2d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 2, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x, output_size=None): if output_size is None: - self._output_size = output_size - elif isinstance(output_size, (list, tuple, int)): - self._output_size = utils.convert_to_list(output_size, 2, - 'output_size') + output_padding = self.output_padding else: - raise ValueError( - "output_size should be int, ot list[int] or tuple[int]") - self._padding = padding + output_padding = 0 - filter_shape = [self._num_channels, num_filters // groups - ] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) - - def forward(self, input): - out = F.conv2d_transpose( - input, + out = F.conv_transpose2d( + x, self.weight, bias=self.bias, - output_size=self._output_size, padding=self._padding, + output_padding=output_padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, + output_size=output_size, data_format=self._data_format) return out -class Conv3D(layers.Layer): +class Conv3d(_ConvNd): """ - :alias_main: paddle.nn.Conv3D - :alias: paddle.nn.Conv3D,paddle.nn.layer.Conv3D,paddle.nn.layer.conv.Conv3D - - **Convlution3D Layer** - - The convolution3D layer calculates the output based on the input, filter + **Convlution3d Layer** + The convolution3d layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input(Input) and Output(Output) are multidimensional tensors with a shape of :math:`[N, C, D, H, W]` . Where N is batch size, C is the number of @@ -451,49 +790,21 @@ class Conv3D(layers.Layer): but adds one dimension(depth). If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. - For each input :math:`X`, the equation is: - .. math:: - Out = \sigma (W \\ast X + b) - In the above equation: - * :math:`X`: Input value, a tensor with NCDHW or NDHWC format. * :math:`W`: Filter value, a tensor with MCDHW format. * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`\\sigma`: Activation function. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - - Example: - - - Input: - - Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - - Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)` - - - Output: - Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ - H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ - W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 - Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of filter. It is as same as the output image channel. - filter_size (int|tuple, optional): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). - Otherwise, the filter will be a square, filter_size_depth = filter_size_height - = filter_size_width = filter_size. - stride (int|tuple, optional): The stride size. If stride is a tuple, it must + in_channels(int): The number of input channels in the input image. + out_channels(int): The number of output channels produced by the convolution. + kernel_size (int|list|tuple, optional): The size of the convolving kernel. + stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must contain three integers, (stride_D, stride_H, stride_W). Otherwise, the stride_D = stride_H = stride_W = stride. The default value is 1. padding (int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -503,7 +814,7 @@ class Conv3D(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation (int|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups (int, optional): The groups number of the Conv3d Layer. According to grouped @@ -511,7 +822,8 @@ class Conv3D(layers.Layer): the first half of the filters is only connected to the first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. The default value is 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. + weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights of conv3d. If it is set to None or one attribute of ParamAttr, conv3d will create ParamAttr as param_attr. If it is set to None, the parameter is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is @@ -521,120 +833,97 @@ class Conv3D(layers.Layer): If it is set to None or one attribute of ParamAttr, conv3d will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. The default value is None. - use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. The default value is True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - The default value is None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCDHW" or "NDHWC". Default: "NCDHW". - dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32". - Attribute: **weight** (Parameter): the learnable weights of filters of this layer. - **bias** (Parameter): the learnable bias of this layer. - - Returns: - None. - + Shape: + - x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + - output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + Where + .. math:: + D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ + H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 Raises: ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. - Examples: .. code-block:: python - import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn - + + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv3D(4, 6, (3, 3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + + paddle.disable_static() + x_var = dg.to_variable(x) + conv = nn.Conv3d(4, 6, (3, 3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 6, 6, 6) """ def __init__(self, - num_channels, - num_filters, - filter_size, + in_channels, + out_channels, + kernel_size, padding=0, stride=1, dilation=1, groups=1, - param_attr=None, + padding_mode='zeros', + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCDHW", - dtype='float32'): - super(Conv3D, self).__init__() - assert param_attr is not False, "param_attr should not be False here." - self._num_channels = num_channels - self._num_filters = num_filters - self._groups = groups - self._act = act - self._use_cudnn = use_cudnn - self._dtype = dtype - self._data_format = data_format - - self._stride = utils.convert_to_list(stride, 3, 'stride') - self._dilation = utils.convert_to_list(dilation, 3, 'dilation') - self._filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') - channel_last = (data_format == "NDHWC") - self._padding = padding - - self._param_attr = param_attr - self._bias_attr = bias_attr - - if num_channels % groups != 0: - raise ValueError("num_channels must be divisible by groups.") - num_filter_channels = num_channels // groups - - filter_shape = [num_filters, num_filter_channels] + self._filter_size - - self.weight = self.create_parameter( - attr=self._param_attr, - shape=filter_shape, - dtype=self._dtype, - default_initializer=_get_default_param_initializer( - self._num_channels, self._filter_size)) - - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) + data_format="NCDHW"): + super(Conv3d, self).__init__( + in_channels, + out_channels, + kernel_size, + False, + 3, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x): + if self._padding_mode != 'zeros': + x = F.pad(x, + self._reversed_padding_repeated_twice, + mode=self._padding_mode, + data_format=self._data_format) + return F.conv3d( + x, + self.weight, + bias=self.bias, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) - def forward(self, input): out = F.conv3d( - input, + x, self.weight, bias=self.bias, padding=self._padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, data_format=self._data_format) return out -class Conv3DTranspose(layers.Layer): +class ConvTranspose3d(_ConvNd): """ - :alias_main: paddle.nn.Conv3DTranspose - :alias: paddle.nn.Conv3DTranspose,paddle.nn.layer.Conv3DTranspose,paddle.nn.layer.conv.Conv3DTranspose - **Convlution3D transpose layer** - The convolution3D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the number of channels, @@ -646,70 +935,38 @@ class Conv3DTranspose(layers.Layer): If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. - For each input :math:`X`, the equation is: - .. math:: - Out = \sigma (W \\ast X + b) - In the above equation: - * :math:`X`: Input value, a tensor with NCDHW format. * :math:`W`: Filter value, a tensor with MCDHW format. * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`\\sigma`: Activation function. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - Example: - - - Input: - - Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - - Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)` - - - Output: - - Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\ - H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\ - W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\ - D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\ - H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\ - **Note**: - - The conv3d_transpose can be seen as the backward of the conv3d. For conv3d, + The conv_transpose3d can be seen as the backward of the conv3d. For conv3d, when stride > 1, conv3d maps multiple input shape to the same output shape, - so for conv3d_transpose, when stride > 1, input shape maps multiple output shape. + so for conv_transpose3d, when stride > 1, input shape maps multiple output shape. If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \ H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, - conv3d_transpose can compute the kernel size automatically. - - + conv_transpose3d can compute the kernel size automatically. Parameters: - num_channels(int): The number of channels in the input image. - num_filters(int): The number of the filter. It is as same as the output - image channel. - filter_size(int|tuple): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - output_size(int or tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. + in_channels(int): The number of channels in the input image. + out_channels(int): The number of channels produced by the convolution. + kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + it must contain three integers, (kernel_size_D, kernel_size_H, kernel_size_W). + Otherwise, the kernel will be a square. + stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` @@ -717,11 +974,9 @@ class Conv3DTranspose(layers.Layer): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, - stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. - The default value is 1. - dilation(int|tuple, optional): The dilation size. If dilation is a tuple, it must + output_padding(int|list|tuple, optional): Additional size added to one side + of each dimension in the output shape. Default: 0. + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by @@ -730,7 +985,7 @@ class Conv3DTranspose(layers.Layer): first half of the input channels, while the second half of the filters is only connected to the second half of the input channels. The default value is 1. - param_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights + weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. The default value is None. @@ -739,109 +994,86 @@ class Conv3DTranspose(layers.Layer): If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. The default value is None. - use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. The default value is True. - act (str, optional): Activation type, if it is set to None, activation is not appended. - The default value is None. + output_size(int|list|tuple, optional): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). None if use + filter_size, padding, and stride to calculate output_size. + if output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. data_format (str, optional): Data format that specifies the layout of input. It can be "NCDHW" or "NDHWC". Default: "NCDHW". - Attribute: **weight** (Parameter): the learnable weights of filters of this layer. - **bias** (Parameter): the learnable bias of this layer. - - Returns: - None. - + Shape: + - x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + - output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + Where + .. math:: + D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\ + H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel_size[2] - 1) + 1 \\\\ Raises: ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. - Examples: .. code-block:: python - import numpy as np - from paddle import fluid - import paddle.fluid.dygraph as dg - from paddle import nn - + import paddle + import paddle.nn as nn x = np.random.uniform(-1, 1, (2, 4, 8, 8, 8)).astype('float32') - place = fluid.CPUPlace() - with dg.guard(place): - x_var = dg.to_variable(x) - conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) + + paddle.disable_static() + x_var = paddle.to_tensor(x) + conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) # (2, 6, 10, 10, 10) """ def __init__(self, - num_channels, - num_filters, - filter_size, - output_size=None, - padding=0, + in_channels, + out_channels, + kernel_size, stride=1, + padding=0, + output_padding=0, dilation=1, groups=1, - param_attr=None, + weight_attr=None, bias_attr=None, - use_cudnn=True, - act=None, - data_format="NCDHW", - dtype='float32'): - super(Conv3DTranspose, self).__init__() - if not isinstance(use_cudnn, bool): - raise ValueError("use_cudnn should be True or False") - assert param_attr is not False, "param_attr should not be False in conv3d_transpose." - self._num_channels = num_channels - self._num_filters = num_filters - self._groups = groups - self._use_cudnn = use_cudnn - self._act = act - self._dtype = dtype - self._data_format = data_format - - self._stride = utils.convert_to_list(stride, 3, 'stride') - self._dilation = utils.convert_to_list(dilation, 3, 'dilation') - self._filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') - channel_last = (data_format == "NDHWC") - self._padding = padding + data_format="NCDHW"): + super(ConvTranspose3d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 3, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x, output_size): if output_size is None: - self._output_size = output_size - elif isinstance(output_size, (list, tuple, int)): - self._output_size = utils.convert_to_list(output_size, 3, - 'output_size') + output_padding = self.output_padding else: - raise ValueError( - "output_size should be int, ot list[int] or tuple[int]") - - self._param_attr = param_attr - self._bias_attr = bias_attr + output_padding = 0 - filter_shape = [num_channels, num_filters // groups] + self._filter_size - self.weight = self.create_parameter( - dtype=self._dtype, shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._num_filters], - dtype=self._dtype, - is_bias=True) - - def forward(self, input): - out = F.conv3d_transpose( - input, + out = F.conv_transpose3d( + x, self.weight, bias=self.bias, - output_size=self._output_size, padding=self._padding, + output_padding=output_padding, stride=self._stride, dilation=self._dilation, groups=self._groups, - use_cudnn=self._use_cudnn, - act=self._act, + output_size=output_size, data_format=self._data_format) return out diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 0cd3673288e676c465f2802ac78edeb73e860180..de10e77eb1c000e66a7a914dc94ce39a6268bb61 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -12,24 +12,133 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define loss functions of neural network +# TODO: define loss functions of neural network import numpy as np import paddle.fluid as fluid import paddle.fluid.core as core import paddle from .. import functional as F +from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator __all__ = [ - # 'NCELoss', + 'BCEWithLogitsLoss', 'CrossEntropyLoss', 'MSELoss', 'L1Loss', 'NLLLoss', 'BCELoss', - 'MarginRankingLoss' + 'KLDivLoss', + 'MarginRankingLoss', + 'CTCLoss', + 'SmoothL1Loss', ] +class BCEWithLogitsLoss(fluid.dygraph.Layer): + """ + This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. + Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` + layer and some reduce operations. + + This measures the element-wise probability error in classification tasks + in which each class is independent. + This can be thought of as predicting labels for a data-point, where labels + are not mutually exclusive. For example, a news article can be about + politics, technology or sports at the same time or none of these. + + First this operator calculate loss function as follows: + + .. math:: + Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit)) + + We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\e^{-Logit}}`. By substituting this we get: + + .. math:: + Out = Logit - Logit * Labels + \\log(1 + \\e^{-Logit}) + + For stability and to prevent overflow of :math:`\\e^{-Logit}` when Logit < 0, + we reformulate the loss as follows: + + .. math:: + Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + \\e^{-\|Logit\|}) + + Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the + weight tensor on the loss `Out`. The ``weight`` tensor will attach different + weight on every items in the batch. The ``pos_weight`` will attach different + weight on the positive label of each class. + + Finally, this operator applies reduce operation on the loss. + If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`. + If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`. + If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`. + + Note that the target labels ``label`` should be numbers between 0 and 1. + + Args: + weight (Tensor, optional): A manual rescaling weight given to the loss of each + batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`, + The data type is float32, float64. Default is ``'None'``. + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the summed loss is returned. + Default is ``'mean'``. + pos_weight (Tensor, optional): A weight of positive examples. Must be a vector + with length equal to the number of classes. The data type is float32, float64. + Default is ``'None'``. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shapes: + logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *], + N is batch_size, `*` means number of additional dimensions. The ``logit`` + is usually the output of Linear layer. Available dtype is float32, float64. + label (Tensor): The target labels tensor. 2-D tensor with the same shape as + ``logit``. The target labels which values should be numbers between 0 and 1. + Available dtype is float32, float64. + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is + same as ``logit`` , else the shape of output is scalar. + + Returns: + A callable object of BCEWithLogitsLoss. + + Examples: + + .. code-block:: python + import paddle + paddle.disable_static() + logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") + label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") + bce_logit_loss = paddle.nn.BCEWithLogitsLoss() + output = bce_logit_loss(logit, label) + print(output.numpy()) # [0.45618808] + + """ + + def __init__(self, + weight=None, + reduction='mean', + pos_weight=None, + name=None): + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in BCEWithLogitsLoss should be 'sum', 'mean' or 'none', but " + "received %s, which is not allowed." % reduction) + + super(BCEWithLogitsLoss, self).__init__() + self.weight = weight + self.reduction = reduction + self.pos_weight = pos_weight + self.name = name + + def forward(self, logit, label): + out = paddle.nn.functional.binary_cross_entropy_with_logits( + logit, label, self.weight, self.reduction, self.pos_weight, + self.name) + return out + + class CrossEntropyLoss(fluid.dygraph.Layer): """ :alias_main: paddle.nn.CrossEntropyLoss @@ -59,8 +168,8 @@ class CrossEntropyLoss(fluid.dygraph.Layer): Parameters: input (Variable): Input tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this - is (N, C, D1, D2,..., Dk), k >= 1. - label (Variable): Label tensor, the data type is int64. Shape is (N), where each + is (N, C, D1, D2,..., Dk), k >= 1. + label (Variable): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. weight (Variable, optional): Weight tensor, a manual rescaling weight given @@ -116,7 +225,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): print(output.numpy()) """ - def __init__(self, weight=None, reduction='mean', ignore_index=-100): + def __init__(self, weight=None, ignore_index=-100, reduction='mean'): super(CrossEntropyLoss, self).__init__() self.weight = weight self.reduction = reduction @@ -134,25 +243,16 @@ class CrossEntropyLoss(fluid.dygraph.Layer): " 'none', but received %s, which is not allowed." % self.reduction) - log_softmax = paddle.nn.LogSoftmax() - log_softmax_out = log_softmax(input) - if self.weight is not None and not isinstance(self.weight, - fluid.framework.Variable): - raise ValueError( - "The weight' is not a Variable, please convert to Variable.") - nll_loss = paddle.nn.loss.NLLLoss( + return paddle.nn.functional.cross_entropy( + input, + label, weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) - - return nll_loss(log_softmax_out, label) + ignore_index=self.ignore_index, + reduction=self.reduction) class MSELoss(fluid.dygraph.layers.Layer): """ - :alias_main: paddle.nn.MSELoss - :alias: paddle.nn.MSELoss,paddle.nn.layer.MSELoss,paddle.nn.layer.loss.MSELoss - **Mean Square Error Loss** Computes the mean square error (squared L2 norm) of given input and label. @@ -174,55 +274,34 @@ class MSELoss(fluid.dygraph.layers.Layer): where `input` and `label` are `float32` tensors of same shape. Parameters: - input (Variable): Input tensor, the data type is float32, - label (Variable): Label tensor, the data type is float32, reduction (string, optional): The reduction method for the output, could be 'none' | 'mean' | 'sum'. - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. - If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. - If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. + If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. Default is ``'mean'``. - Returns: - The tensor variable storing the MSE loss of input and label. - - Return type: - Variable. + Shape: + input (Tensor): Input tensor, the data type is float32 or float64 + label (Tensor): Label tensor, the data type is float32 or float64 + output (Tensor): output tensor storing the MSE loss of input and label, the data type is same as input. Examples: .. code-block:: python import numpy as np import paddle - from paddle import fluid - import paddle.fluid.dygraph as dg - mse_loss = paddle.nn.loss.MSELoss() - input = fluid.data(name="input", shape=[1]) - label = fluid.data(name="label", shape=[1]) - place = fluid.CPUPlace() input_data = np.array([1.5]).astype("float32") label_data = np.array([1.7]).astype("float32") - # declarative mode - output = mse_loss(input,label) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - output_data = exe.run( - fluid.default_main_program(), - feed={"input":input_data, "label":label_data}, - fetch_list=[output], - return_numpy=True) - print(output_data) - # [array([0.04000002], dtype=float32)] - - # imperative mode - with dg.guard(place) as g: - input = dg.to_variable(input_data) - label = dg.to_variable(label_data) - output = mse_loss(input, label) - print(output.numpy()) - # [0.04000002] + paddle.disable_static() + mse_loss = paddle.nn.loss.MSELoss() + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) + output = mse_loss(input, label) + print(output.numpy()) + # [0.04000002] """ def __init__(self, reduction='mean'): @@ -235,10 +314,10 @@ class MSELoss(fluid.dygraph.layers.Layer): def forward(self, input, label): if not fluid.framework.in_dygraph_mode(): - fluid.data_feeder.check_variable_and_dtype(input, 'input', - ['float32'], 'MSELoss') - fluid.data_feeder.check_variable_and_dtype(label, 'label', - ['float32'], 'MSELoss') + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'MSELoss') + fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['float32', 'float64'], 'MSELoss') square_out = fluid.layers.square( fluid.layers.elementwise_sub(input, label)) @@ -255,64 +334,64 @@ class MSELoss(fluid.dygraph.layers.Layer): class L1Loss(fluid.dygraph.Layer): """ This interface is used to construct a callable object of the ``L1Loss`` class. - The L1Loss layer calculates the L1 Loss of ``x`` and ``label`` as follows. + The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. - If :attr:`reduction` set to ``'none'``, the loss is: + If `reduction` set to ``'none'``, the loss is: .. math:: - Out = \lvert x - label\rvert + Out = \lvert input - label\rvert - If :attr:`reduction` set to ``'mean'``, the loss is: + If `reduction` set to ``'mean'``, the loss is: .. math:: - Out = MEAN(\lvert x - label\rvert) + Out = MEAN(\lvert input - label\rvert) - If :attr:`reduction` set to ``'sum'``, the loss is: + If `reduction` set to ``'sum'``, the loss is: .. math:: - Out = SUM(\lvert x - label\rvert) + Out = SUM(\lvert input - label\rvert) + - Parameters: - reduction (str, optional): Indicate the reduction to apply to the loss, + reduction (str, optional): Indicate the reduction to apply to the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. - If :attr:`reduction` is ``'none'``, the unreduced loss is returned; - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. - If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. + If `reduction` is ``'none'``, the unreduced loss is returned; + If `reduction` is ``'mean'``, the reduced mean loss is returned. + If `reduction` is ``'sum'``, the reduced sum loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Shape: - x (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64, int32, int64. - label (Tensor): label. The shapes is [N, *], same shape as ``x`` . It's data type should be float32, float64, int32, int64. - output (Tensor): The L1 Loss of ``x`` and ``label``. - If :attr:`reduction` is ``'none'``, the shape of output loss is [N, *], the same as ``x`` . - If :attr:`reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1], which means the output is a scalar. - + input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of additional dimensions. It's data type should be float32, float64, int32, int64. + label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64, int32, int64. + output (Tensor): The L1 Loss of ``input`` and ``label``. + If `reduction` is ``'none'``, the shape of output loss is [N, *], the same as ``input`` . + If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. + Examples: .. code-block:: python import paddle import numpy as np paddle.disable_static() - x_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") + input_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") label_data = np.array([[1.7, 1], [0.4, 0.5]]).astype("float32") - x = paddle.to_variable(x_data) + input = paddle.to_variable(input_data) label = paddle.to_variable(label_data) l1_loss = paddle.nn.loss.L1Loss() - output = l1_loss(x, label) - print(output.numpy()) + output = l1_loss(input, label) + print(output.numpy()) # [0.35] l1_loss = paddle.nn.loss.L1Loss(reduction='sum') - output = l1_loss(x, label) - print(output.numpy()) + output = l1_loss(input, label) + print(output.numpy()) # [1.4] l1_loss = paddle.nn.loss.L1Loss(reduction='none') - output = l1_loss(x, label) - print(output.numpy()) + output = l1_loss(input, label) + print(output.numpy()) # [[0.20000005 0.19999999] # [0.2 0.79999995]] """ @@ -326,97 +405,87 @@ class L1Loss(fluid.dygraph.Layer): self.reduction = reduction self.name = name - def forward(self, x, label): + def forward(self, input, label): return paddle.nn.functional.l1_loss( - x, label, self.reduction, name=self.name) + input, label, self.reduction, name=self.name) class BCELoss(fluid.dygraph.Layer): """ - :alias_main: paddle.nn.BCELoss - :alias: paddle.nn.BCELoss,paddle.nn.layer.BCELoss,paddle.nn.layer.loss.BCELoss - This interface is used to construct a callable object of the ``BCELoss`` class. - The BCELoss layer measures the binary_cross_entropy loss between input predictions - and target labels. The binary_cross_entropy loss can be described as: + The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input`` + and target labels ``label`` . The binary_cross_entropy loss can be described as: If :attr:`weight` is set, the loss is: .. math:: Out = -1 * weight * (label * log(input) + (1 - label) * log(1 - input)) + If :attr:`weight` is None, the loss is: .. math:: Out = -1 * (label * log(input) + (1 - label) * log(1 - input)) - If :attr:`reduction` set to ``'none'``, the unreduced loss is: + If :attr:`reduction` set to ``'none'``, the interface will return the original loss `Out`. - .. math:: - Out = Out If :attr:`reduction` set to ``'mean'``, the reduced mean loss is: .. math:: Out = MEAN(Out) + If :attr:`reduction` set to ``'sum'``, the reduced sum loss is: .. math:: Out = SUM(Out) - Note that the input predictions always be the output of sigmoid, and the target labels + Note that the input predictions ``input`` always be the output of sigmoid, and the target labels ``label`` should be numbers between 0 and 1. - The shape of input predictions and target labels are [N, *], where N is batch_size and `*` - means any number of additional dimensions. If ``reduction`` is ``'none'``, the shape of - output is scalar, else the shape of output is same as input. - Parameters: - weight (Variable, optional): A manual rescaling weight given to the loss of each - batch element. If given, has to be a Variable of size nbatch and the data type + weight (Tensor, optional): A manual rescaling weight given to the loss of each + batch element. If given, has to be a Tensor of size nbatch and the data type is float32, float64. Default is ``'None'``. - reduction (str, optional): Indicate how to average the loss by batch_size, + reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'none'``, the unreduced loss is returned; - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`reduction` is ``'sum'``, the summed loss is returned. Default is ``'mean'``. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + input (Tensor): 2-D tensor with shape: (N, *), N is batch_size, `*` means + number of additional dimensions. The input ``input`` should always + be the output of sigmod. Available dtype is float32, float64. + label (Tensor): 2-D tensor with the same shape as ``input``. The target + labels which values should be numbers between 0 and 1. Available + dtype is float32, float64. + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is + same as ``input`` , else the shape of output is scalar. - Returns: + Returns: A callable object of BCELoss. Examples: .. code-block:: python - # declarative mode - import paddle.fluid as fluid import numpy as np import paddle - input = fluid.data(name="input", shape=[3, 1], dtype='float32') - label = fluid.data(name="label", shape=[3, 1], dtype='float32') - bce_loss = paddle.nn.loss.BCELoss() - output = bce_loss(input, label) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - input_data = np.array([0.5, 0.6, 0.7]).astype("float32") label_data = np.array([1.0, 0.0, 1.0]).astype("float32") - output_data = exe.run(fluid.default_main_program(), - feed={"input":input_data, "label":label_data}, - fetch_list=[output], - return_numpy=True) - - print(output_data) # [array([0.65537095], dtype=float32)] - - # imperative mode - import paddle.fluid.dygraph as dg - with dg.guard(place) as g: - input = dg.to_variable(input_data) - label = dg.to_variable(label_data) - output = bce_loss(input, label) - print(output.numpy()) # [0.65537095] + + paddle.disable_static() + input = paddle.to_variable(input_data) + label = paddle.to_variable(label_data) + bce_loss = paddle.nn.loss.BCELoss() + output = bce_loss(input, label) + print(output.numpy()) # [0.65537095] + paddle.enable_static() + """ - def __init__(self, weight=None, reduction='mean'): + def __init__(self, weight=None, reduction='mean', name=None): if reduction not in ['sum', 'mean', 'none']: raise ValueError( "The value of 'reduction' in bce_loss should be 'sum', 'mean' or 'none', but " @@ -425,38 +494,12 @@ class BCELoss(fluid.dygraph.Layer): super(BCELoss, self).__init__() self.weight = weight self.reduction = reduction + self.name = name def forward(self, input, label): - dtype = self._helper.input_dtype(input) - - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'bce_loss') - fluid.data_feeder.check_variable_and_dtype( - label, 'label', ['float32', 'float64'], 'bce_loss') - - out = self._helper.create_variable_for_type_inference(dtype=input.dtype) - self._helper.append_op( - type='bce_loss', - inputs={ - 'X': [input], - 'Label': [label], - }, - outputs={'Out': [out]}) - - if self.weight is not None: - if isinstance(self.weight, fluid.framework.Variable): - w = self.weight - out = fluid.layers.elementwise_mul(out, w, axis=-1) - else: - raise ValueError( - "The weight is not a Variable, please convert to Variable.") - - if self.reduction == 'sum': - return fluid.layers.reduce_sum(out) - elif self.reduction == 'mean': - return fluid.layers.reduce_mean(out) - else: - return out + out = paddle.nn.functional.binary_cross_entropy( + input, label, self.weight, self.reduction, self.name) + return out class NLLLoss(fluid.dygraph.Layer): @@ -466,18 +509,18 @@ class NLLLoss(fluid.dygraph.Layer): This class accepts input and target label and returns negative log likelihood cross error. It is useful to train a classification problem with C classes. - + The input for the loss is epected to contain log-probabilities of each classes. It has to be a Tensor of size either (batch_size, C) or (batch_size, C, d1, d2, ..., dK) with K >= 1 for the K-dimensional case. The label for the loss should be a class index in the range [0, C-1] where C is the number of classes. If ignore_index is specified, the specified target value does not contribute to the input gradient. - + If the optional argument `weight` is provided, it should be a 1D Tensor assigning weight to each of the classed. This is particularly useful when you have an unbalanced training set. - + The loss is calculated as follows. The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: @@ -500,11 +543,11 @@ class NLLLoss(fluid.dygraph.Layer): Parameters: weight (Tensor, optional): Weight tensor, a manual rescaling weight given to each class. If given, it has to be a 1D Tensor whose size is `[C, ]`. Otherwise, - it treated as if having all ones. the data type is + it treated as if having all ones. the data type is float32, float64, Default is ``'None'``. ignore_index (int64, optional): Specifies a target value that is ignored and does not contribute to the input gradient. - reduction (str, optional): Indicate how to average the loss, + reduction (str, optional): Indicate how to average the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If `reduction` is ``'mean'``, the reduced mean loss is returned; if `reduction` is ``'sum'``, the reduced sum loss is returned; @@ -574,15 +617,84 @@ class NLLLoss(fluid.dygraph.Layer): name=self._name) +class KLDivLoss(fluid.dygraph.Layer): + """ + This interface calculates the Kullback-Leibler divergence loss + between Input(X) and Input(Target). Notes that Input(X) is the + log-probability and Input(Target) is the probability. + + KL divergence loss is calculated as follows: + + $$l(x, y) = y * (\log(y) - x)$$ + + Parameters: + reduction (str, optional): Indicate how to average the loss, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + Default is ``'mean'``. + + Shape: + - input: (N, *) where * means, any number of additional dimensions. + - label: (N, *), same shape as input + - output: tensor with shape: (1) by default. + + + Examples: + .. code-block:: python + + import paddle + import numpy as np + import paddle.nn as nn + + paddle.enable_imperative() + + shape = (5, 20) + x = np.random.uniform(-10, 10, shape).astype('float32') + target = np.random.uniform(-10, 10, shape).astype('float32') + + # 'batchmean' reduction, loss shape will be [N] + kldiv_criterion = nn.KLDivLoss(reduction='batchmean') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[5] + + # 'mean' reduction, loss shape will be [1] + kldiv_criterion = nn.KLDivLoss(reduction='mean') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[1] + + # 'sum' reduction, loss shape will be [1] + kldiv_criterion = nn.KLDivLoss(reduction='sum') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[1] + + # 'none' reduction, loss shape is same with X shape + kldiv_criterion = nn.KLDivLoss(reduction='none') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[5, 20] + """ + + def __init__(self, reduction='mean'): + super(KLDivLoss, self).__init__() + self.reduction = reduction + + def forward(self, input, label): + out = paddle.nn.functional.kl_div(input, label, self.reduction) + return out + + class MarginRankingLoss(fluid.dygraph.Layer): """ This interface is used to construct a callable object of the ``MarginRankingLoss`` class. - The MarginRankingLoss layer calculates the margin rank loss between the input, other and target + The MarginRankingLoss layer calculates the margin rank loss between the input, other and label , use the math function as follows. - .. math:: - margin\_rank\_loss = max(0, -target * (input - other) + margin) + .. math:: + margin\_rank\_loss = max(0, -label * (input - other) + margin) If :attr:`reduction` set to ``'mean'``, the reduced mean loss is: @@ -601,11 +713,11 @@ class MarginRankingLoss(fluid.dygraph.Layer): reduction (str, optional): Indicate the reduction to apply to the loss, the candicates are ``'none'``, ``'mean'``, ``'sum'``.If :attr:`reduction` is ``'none'``, the unreduced loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned. If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. Default is ``'mean'``. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - Shape: + Shape: input: N-D Tensor, the shape is [N, *], N is batch size and `*` means any number of additional dimensions., available dtype is float32, float64. other: N-D Tensor, `other` have the same shape and dtype as `input`. - target: N-D Tensor, target have the same shape and dtype as `input`. - out: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. + label: N-D Tensor, label have the same shape and dtype as `input`. + output: If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the out shape is :math:`[1]`, otherwise the shape is the same as `input` .The same dtype as input tensor. Returns: A callable object of MarginRankingLoss. @@ -614,30 +726,194 @@ class MarginRankingLoss(fluid.dygraph.Layer): .. code-block:: python - import numpy as np - import paddle - + import numpy as np + import paddle + paddle.disable_static() - + input = paddle.to_variable(np.array([[1, 2], [3, 4]]).astype("float32")) other = paddle.to_variable(np.array([[2, 1], [2, 4]]).astype("float32")) - target = paddle.to_variable(np.array([[1, -1], [-1, -1]]).astype("float32")) + label = paddle.to_variable(np.array([[1, -1], [-1, -1]]).astype("float32")) margin_rank_loss = paddle.nn.MarginRankingLoss() - loss = margin_rank_loss(input, other, target) + loss = margin_rank_loss(input, other, label) print(loss.numpy()) # [0.75] """ def __init__(self, margin=0.0, reduction='mean', name=None): if reduction not in ['sum', 'mean', 'none']: raise ValueError( - "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but " + "The value of 'reduction' in MarginRankingLoss should be 'sum', 'mean' or 'none', but " "received %s, which is not allowed." % reduction) super(MarginRankingLoss, self).__init__() self.margin = margin self.reduction = reduction self.name = name - def forward(self, input, other, target): + def forward(self, input, other, label): out = paddle.nn.functional.margin_ranking_loss( - input, other, target, self.margin, self.reduction, self.name) + input, other, label, self.margin, self.reduction, self.name) return out + + +class CTCLoss(fluid.dygraph.Layer): + """ + :alias_main: paddle.nn.CTCLoss + :alias: paddle.nn.CTCLoss, paddle.nn.layer.CTCLoss, paddle.nn.layer.loss.CTCLoss + + An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) + to compute Connectionist Temporal Classification (CTC) loss. + It can be aliased as softmax with CTC, since a native softmax activation + is interated to the Warp-CTC library to normalize values for each row of the input tensor. + + Parameters: + blank (int, optional): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the half-opened interval [0, num_classes + 1). The data type must be int32. Default is 0. + reduction (string, optional): Indicate how to average the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the output loss will be divided by the label_lengths, and then return the mean of quotient; If :attr:`reduction` is ``'sum'``, return the sum of loss; If :attr:`reduction` is ``'none'``, no reduction will be applied. Default is ``'mean'``. + + Shape: + log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type must be float32. + labels (Tensor): The ground truth sequence with padding, which must be a 3-D Tensor. The tensor shape is [batch_size, max_label_length], where max_label_length is the longest length of label sequence. The data type must be int32. + input_lengths (Tensor): The length for each input sequence, it should have shape [batch_size] and dtype int64. + label_lengths (Tensor): The length for each label sequence, it should have shape [batch_size] and dtype int64. + + Returns: + Tensor, The Connectionist Temporal Classification (CTC) loss between ``log_probs`` and ``labels``. If attr:`reduction` is ``'none'``, the shape of loss is [batch_size], otherwise, the shape of loss is [1]. Data type is the same as ``log_probs``. + + Examples: + + .. code-block:: python + + # declarative mode + import numpy as np + import paddle + + # length of the longest logit sequence + max_seq_length = 4 + #length of the longest label sequence + max_label_length = 3 + # number of logit sequences + batch_size = 2 + # class num + class_num = 3 + + np.random.seed(1) + log_probs = np.array([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04], + [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]], + + [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01], + [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]], + + [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02], + [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]], + + [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01], + [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]], + + [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02], + [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]]).astype("float32") + labels = np.array([[1, 2, 2], + [1, 2, 2]]).astype("int32") + input_lengths = np.array([5, 5]).astype("int64") + label_lengths = np.array([3, 3]).astype("int64") + + paddle.disable_static() + log_probs = paddle.to_tensor(log_probs) + labels = paddle.to_tensor(labels) + input_lengths = paddle.to_tensor(input_lengths) + label_lengths = paddle.to_tensor(label_lengths) + + loss = paddle.nn.CTCLoss(blank=0, reduction='none')(log_probs, labels, + input_lengths, + label_lengths) + print(loss.numpy()) #[3.9179852 2.9076521] + + loss = paddle.nn.CTCLoss(blank=0, reduction='mean')(log_probs, labels, + input_lengths, + label_lengths) + print(loss.numpy()) #[1.1376063] + """ + + def __init__(self, blank=0, reduction='mean'): + super(CTCLoss, self).__init__() + self.blank = blank + self.reduction = reduction + + def forward(self, log_probs, labels, input_lengths, label_lengths): + return paddle.nn.functional.ctc_loss(log_probs, labels, input_lengths, + label_lengths, self.blank, + self.reduction) + + +class SmoothL1Loss(fluid.dygraph.Layer): + """ + This operator calculates smooth_l1_loss. Creates a criterion that uses a squared + term if the absolute element-wise error falls below 1 and an L1 term otherwise. + In some cases it can prevent exploding gradients and it is more robust and less + sensitivity to outliers. Also known as the Huber loss: + + .. math:: + + loss(x,y)=\\frac{1}{n}\\sum_{i}z_i + + where z_i is given by: + + .. math:: + + \\mathop{z_i}=\\left\\{\\begin{array}{rcl} + 0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\ + delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise} + \\end{array} \\right. + + Parameters: + reduction (str, optional): Indicate how to average the loss by batch_size, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned. + If :attr:`reduction` is ``'none'``, the unreduced loss is returned. + Default is ``'mean'``. + delta (float, optional): Specifies the hyperparameter delta to be used. + The value determines how large the errors need to be to use L1. Errors + smaller than delta are minimized with L2. Parameter is ignored for + negative/zero values. Default = 1.0 + name (str, optional): Name for the operation (optional, default is + None). For more information, please refer to :ref:`api_guide_Name`. + + Call Parameters: + input (Tensor): Input tensor, the data type is float32 or float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Tensor): Label tensor, the data type is float32 or float64. The shape of label + is the same as the shape of input. + + Returns: + The tensor variable storing the smooth_l1_loss of input and label. + + Return type: Tensor. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(3,3).astype("float32") + label_data = np.random.rand(3,3).astype("float32") + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) + loss = paddle.nn.SmoothL1Loss() + output = loss(input, label) + print(output.numpy()) + """ + + def __init__(self, reduction='mean', delta=1.0, name=None): + super(SmoothL1Loss, self).__init__() + self.reduction = reduction + self.delta = delta + self.name = name + + def forward(self, input, label): + return F.smooth_l1_loss( + input, + label, + reduction=self.reduction, + delta=self.delta, + name=self.name) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 1beba62c1809ffd94a22712fb24ac43a0ec23ff1..369d462a8089a30e6b749ef472aad66166cb590d 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -14,6 +14,7 @@ # TODO: define normalization api +import warnings from ...fluid.dygraph.nn import InstanceNorm from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS @@ -21,6 +22,231 @@ from ...fluid.dygraph import GroupNorm #DEFINE_ALIAS from ...fluid.dygraph import LayerNorm #DEFINE_ALIAS from ...fluid.dygraph import SpectralNorm #DEFINE_ALIAS +from ...fluid.dygraph import layers +from ...fluid.framework import in_dygraph_mode + +from ...fluid.initializer import Constant +from ...fluid.param_attr import ParamAttr +from ...fluid.data_feeder import check_variable_and_dtype, check_type +from ...fluid import core + __all__ = [ - 'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'InstanceNorm' + 'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'InstanceNorm', + 'SyncBatchNorm' ] + + +class SyncBatchNorm(layers.Layer): + """ + This interface is used to construct a callable object of the ``SyncBatchNorm`` class. + It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can + be used as a normalizer function for other operations, such as conv2d and fully connected + operations. + The data is normalized by the mean and variance of the channel based on whole mini-batch + , which including data in all gpus. + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift `_ + for more details. + + When model in training mode, the :math:`\\mu_{\\beta}` + and :math:`\\sigma_{\\beta}^{2}` are the statistics of whole mini-batch data in all gpus. + Calculated as follows: + + .. math:: + + \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\ + \ mini-batch\ mean \\\\ + \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ + \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ + + - :math:`x` : whole mini-batch data in all gpus + - :math:`m` : the size of the whole mini-batch data + + When model in evaluation mode, the :math:`\\mu_{\\beta}` + and :math:`\\sigma_{\\beta}^{2}` are global statistics (moving_mean and moving_variance, + which usually got from the pre-trained model). Global statistics calculated as follows: + + .. math:: + moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\ + moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\ + + The formula of normalization is as follows: + + .. math:: + + \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ + \\sigma_{\\beta}^{2} + \\eps}} \\qquad &//\ normalize \\\\ + y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift + + - :math:`\\eps` : add a smaller value to the variance to prevent division by zero + - :math:`\\gamma` : trainable scale parameter vector + - :math:`\\beta` : trainable shift parameter vector + + Parameters: + num_features(int): Indicate the number of channels of the input ``Tensor``. + epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5. + momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. + weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale` + of this layer. If it is set to None or one attribute of ParamAttr, this layerr + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. If it is set to False, + this layer will not have trainable scale parameter. Default: None. + bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of this layer. + If it is set to None or one attribute of ParamAttr, this layer + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. If it is set to False, this layer will not + have trainable bias parameter. Default: None. + track_running_stats(bool, optional): Whether to compute global stats, which including running mean and + running variance. Default: True. + + Shapes: + input: Tensor that the dimension from 2 to 5. + output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + + x = np.array([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32') + paddle.disable_static() + x = paddle.to_tensor(x) + if paddle.fluid.is_compiled_with_cuda(): + sync_batch_norm = nn.SyncBatchNorm(2) + hidden1 = sync_batch_norm(x) + print(hidden1.numpy()) + # [[[[0.26824948, 1.0936325],[0.26824948, -1.6301316]],[[ 0.8095662, -0.665287],[-1.2744656, 1.1301866 ]]]] + """ + + def __init__(self, + num_features, + epsilon=1e-05, + momentum=0.9, + track_running_stats=True, + weight_attr=None, + bias_attr=None, + data_format='NCHW', + name=None): + super(SyncBatchNorm, self).__init__() + self._weight_attr = weight_attr + self._bias_attr = bias_attr + self._num_features = num_features + self._data_layout = data_format + self._momentum = momentum + self._epsilon = epsilon + self._track_running_stats = track_running_stats + + if self._track_running_stats == False: + warnings.warn( + "moving mean and moving variance will be calculated whether `track_running_stats` is set to `True` or `False`, we will fix it in the next version." + ) + + param_shape = [self._num_features] + + # create parameter + if weight_attr == False: + self.weight = self.create_parameter( + attr=None, shape=param_shape, default_initializer=Constant(1.0)) + self.weight.stop_gradient = True + else: + self.weight = self.create_parameter( + attr=self._weight_attr, + shape=param_shape, + default_initializer=Constant(1.0)) + self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. + + if bias_attr == False: + self.bias = self.create_parameter( + attr=None, + shape=param_shape, + default_initializer=Constant(0.0), + is_bias=True) + self.bias.stop_gradient = True + else: + self.bias = self.create_parameter( + attr=self._bias_attr, shape=param_shape, is_bias=True) + self.bias.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. + + self._mean = self.create_parameter( + attr=ParamAttr( + name=None, + initializer=Constant(0.0), + trainable=False, + do_model_average=True), + shape=param_shape, + dtype=self._dtype) + self._mean.stop_gradient = True + + self._variance = self.create_parameter( + attr=ParamAttr( + name=None, + initializer=Constant(1.0), + trainable=False, + do_model_average=True), + shape=param_shape, + dtype=self._dtype) + self._variance.stop_gradient = True + + def forward(self, x): + # create output + # mean and mean_out share the same memory + mean_out = self._mean + # variance and variance out share the same memory + variance_out = self._variance + + ### train mode: use mini-batch stats, eval mode: use global stats + ### use_global_stats only support False in sync_batch_norm + if in_dygraph_mode(): + attrs = ("momentum", self._momentum, "epsilon", self._epsilon, + "is_test", not self.training, "data_layout", + self._data_layout, "use_mkldnn", False, "fuse_with_relu", + False, "use_global_stats", False, 'trainable_statistics', + False) + sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm( + x, self.weight, self.bias, self._mean, self._variance, mean_out, + variance_out, *attrs) + + return sync_batch_norm_out + + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'BatchNorm') + + attrs = { + "momentum": self._momentum, + "epsilon": self._epsilon, + "is_test": not self.training, + "data_layout": self._data_layout, + "use_mkldnn": False, + "fuse_with_relu": False, + "use_global_stats": False, + "trainable_statistics": False, + } + + inputs = { + "X": [x], + "Scale": [self.weight], + "Bias": [self.bias], + "Mean": [self._mean], + "Variance": [self._variance] + } + + saved_mean = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + saved_variance = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + sync_batch_norm_out = self._helper.create_variable_for_type_inference( + self._dtype) + + outputs = { + "Y": [sync_batch_norm_out], + "MeanOut": [mean_out], + "VarianceOut": [variance_out], + "SavedMean": [saved_mean], + "SavedVariance": [saved_variance] + } + + self._helper.append_op( + type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) + return sync_batch_norm_out diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py new file mode 100755 index 0000000000000000000000000000000000000000..d4e50bd993c2a07c01fd6f4ebcadb20ed8e38cdb --- /dev/null +++ b/python/paddle/nn/layer/pooling.py @@ -0,0 +1,877 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +from ...fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype +from ...fluid.layers import utils +from ...fluid.dygraph import layers +from ...fluid.layer_helper import LayerHelper +from .. import functional as F + +__all__ = [ + 'AdaptiveAvgPool2d', + 'AdaptiveAvgPool3d', + 'AvgPool1d', + 'maxPool1d', + 'AdaptiveMaxPool1d', + 'AdaptiveAvgPool1d', + 'AvgPool2d', + 'MaxPool2d', + 'AvgPool3d', + 'MaxPool3d', +] + + +class AdaptiveAvgPool2d(layers.Layer): + """ + + This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + + For avg adaptive pool2d: + + .. math:: + + hstart &= floor(i * H_{in} / H_{out}) + + hend &= ceil((i + 1) * H_{in} / H_{out}) + + wstart &= floor(j * W_{in} / W_{out}) + + wend &= ceil((j + 1) * W_{in} / W_{out}) + + Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} + + + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two element, (H, W). H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in + the order of: [batch_size, input_channels, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Shape: + x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64. + output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x. + + Returns: + A callable object of AdaptiveAvgPool2d. + + Examples: + .. code-block:: python + + # adaptive avg pool2d + # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + # of input data into m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(m): + # for j in range(n): + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 32, 32] + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3) + pool_out = adaptive_avg_pool(x = x) + # pool_out.shape is [2, 3, 3, 3] + """ + + def __init__(self, output_size, data_format="NCHW", name=None): + super(AdaptiveAvgPool2d, self).__init__() + self._output_size = output_size + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.adaptive_avg_pool2d( + x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) + + +class AdaptiveAvgPool3d(layers.Layer): + """ + + This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + + For avg adaptive pool3d: + + .. math:: + + dstart &= floor(i * D_{in} / D_{out}) + + dend &= ceil((i + 1) * D_{in} / D_{out}) + + hstart &= floor(j * H_{in} / H_{out}) + + hend &= ceil((j + 1) * H_{in} / H_{out}) + + wstart &= floor(k * W_{in} / W_{out}) + + wend &= ceil((k + 1) * W_{in} / W_{out}) + + Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + + + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in + the order of: [batch_size, input_channels, input_depth, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Shape: + x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float16, float32, float64, int32 or int64. + output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x. + + Returns: + A callable object of AdaptiveAvgPool3d. + + Examples: + .. code-block:: python + + # adaptive avg pool3d + # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + # of input data into l * m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(l): + # for j in range(m): + # for k in range(n): + # dstart = floor(i * D / l) + # dend = ceil((i + 1) * D / l) + # hstart = floor(j * H / m) + # hend = ceil((j + 1) * H / m) + # wstart = floor(k * W / n) + # wend = ceil((k + 1) * W / n) + # output[:, :, i, j, k] = + # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 8, 32, 32] + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3) + pool_out = adaptive_avg_pool(x = x) + # pool_out = [2, 3, 3, 3, 3] + """ + + def __init__(self, output_size, data_format="NCDHW", name=None): + super(AdaptiveAvgPool3d, self).__init__() + self._output_size = output_size + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.adaptive_avg_pool3d( + x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) + + +class AvgPool1d(layers.Layer): + """ + This operation applies a 1D average pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + The output value of the layer with input size (N, C, L), + output (N, C, L_{out}) and kernel_size k can be precisely described as + For average pool1d: + + .. math:: + + Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k]) + + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one integers. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain one integers. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, + then the input is implicitly zero-padded on both sides for padding number of points. + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. + If it is set to False, the floor function will be used. Default False + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + None. + + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ValueError: If `padding` is a list or tuple but its length greater than 1. + ShapeError: If the input is not a 3-D. + ShapeError: If the output's shape calculated is not greater than 0. + + + Examples: + + .. code-block:: python + import paddle + import paddle.nn as nn + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + AvgPool1d = nn.AvgPool1d(kernel_size=2, stride=2, padding=0) + pool_out = AvgPool1d(data) + # pool_out shape: [1, 3, 16] + + """ + + def __init__(self, + kernel_size, + stride=None, + padding=0, + count_include_pad=True, + ceil_mode=False, + name=None): + super(AvgPool1d, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.count_include_pad = count_include_pad + self.name = name + + def forward(self, x): + out = F.avg_pool1d(x, self.kernel_size, self.stride, self.padding, + self.count_include_pad, self.ceil_mode, self.name) + return out + + +class MaxPool1d(layers.Layer): + """ + Applies a 1D max pooling over an input signal composed of several input planes based + on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + + The output value of the layer with input size (N, C, L), + output (N, C, L_{out}) and kernel_size k can be precisely described as + For average pool1d: + + .. math:: + + Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one integers. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain one integers. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be the following forms: `[pad_left, pad_right]`. + return_indices (bool): Whether return the max indices along with the outputs. default is `False`. + ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. + If it is set to False, the floor function will be used. Default False + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + None. + + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ValueError: If `padding` is a list or tuple but its length greater than 1. + ShapeError: If the input is not a 3-D. + ShapeError: If the output's shape calculated is not greater than 0. + + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn as nn + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0) + pool_out = MaxPool1d(data) + # pool_out shape: [1, 3, 16] + + MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True) + pool_out, indices = MaxPool1d(data) + # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + + """ + + def __init__(self, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + name=None): + super(MaxPool1d, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.return_indices = return_indices + self.name = name + + def forward(self, input): + out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding, + self.return_indices, self.ceil_mode, self.name) + return out + + +class AdaptiveAvgPool1d(layers.Layer): + """ + + This operation applies a 1D adaptive average pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For average adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} + + Args: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + None. + + Raises: + ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + + Examples: + .. code-block:: python + + # average adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) + # + import paddle + import paddle.nn as nn + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16) + pool_out = AdaptiveAvgPool1d(data) + # pool_out shape: [1, 3, 16] + """ + + def __init__(self, output_size, name=None): + super(AdaptiveAvgPool1d, self).__init__() + self.output_size = output_size + self.name = name + + def forward(self, input): + return F.adaptive_avg_pool1d(input, self.output_size, self.name) + + +class AdaptiveMaxPool1d(layers.Layer): + """ + + This operation applies a 1D adaptive max pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For max adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= max(Input[lstart:lend])} + + Args: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + return_indices (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: + None. + + Raises: + ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + + Examples: + .. code-block:: python + + # max adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = max(input[:, :, lstart: lend]) + # + import paddle + import paddle.nn as nn + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16) + pool_out = AdaptiveMaxPool1d(data) + # pool_out shape: [1, 3, 16] + + # for return_indices = true + AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True) + pool_out, indices = AdaptiveMaxPool1d(data) + # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + + """ + + def __init__(self, output_size, return_indices=False, name=None): + super(AdaptiveMaxPool1d, self).__init__() + self.output_size = output_size + self.return_indices = return_indices + self.name = name + + def forward(self, input): + return F.adaptive_max_pool1d(input, self.output_size, + self.return_indices, self.name) + + +class AvgPool2d(layers.Layer): + """ + This operation applies 2D average pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + + Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Attr: + kernel_size: ksize + + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + $$ + out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} + input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) + $$ + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. Default: kernel_size. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be in three forms: `[pad_height, pad_width]` or + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, + `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + Otherwise, the pool padding size will be a square of an int. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + + Returns: None. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # max pool2d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + AvgPool2d = nn.AvgPool2d(kernel_size=2, + stride=2, padding=0) + output = AvgPoo2d(input) + # output.shape [1, 3, 16, 16] + + """ + + def __init__(self, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCHW", + name=None): + super(AvgPool2d, self).__init__() + self.ksize = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.count_include_pad = count_include_pad + self.divisor = divisor_override + self.data_format = data_format + self.name = name + + def forward(self, x): + return F.avg_pool2d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + count_include_pad=self.count_include_pad, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) + + +class MaxPool2d(layers.Layer): + """ + This operation applies 2D max pooling over input feature based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + + Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Attr: + kernel_size: ksize + + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + $$ + out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\ + & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, + \text{stride[1]} \times w + n) + $$ + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. Default: kernel_size. + padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be in three forms: `[pad_height, pad_width]` or + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, + `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + Otherwise, the pool padding size will be a square of an int. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + return_indices (bool): Whether to return the max indices along with the outputs. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: None + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # max pool2d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + MaxPool2d = nn.MaxPool2d(kernel_size=2, + stride=2, padding=0) + output = MaxPool2d(input) + # output.shape [1, 3, 16, 16] + + # for return_indices=True + MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True) + output, max_indices = MaxPool2d(input) + # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], + """ + + def __init__(self, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + data_format="NCHW", + name=None): + super(MaxPool2d, self).__init__() + self.ksize = kernel_size + self.stride = stride + self.padding = padding + self.return_indices = return_indices + self.ceil_mode = ceil_mode + self.data_format = data_format + self.name = name + + def forward(self, x): + return F.max_pool2d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + return_indices=self.return_indices, + data_format=self.data_format, + name=self.name) + + +class MaxPool3d(layers.Layer): + """ + This operation applies 3D max pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (pool_size_Depth, pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, + it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + Otherwise, the pool stride size will be a cube of an int. Default kernel_size. + padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape. + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is True. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + + Returns:None. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # max pool3d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32)) + MaxPool3d = nn.MaxPool3d(kernel_size=2, + stride=2, padding=0) + output = MaxPool3d(input) + # output.shape [1, 2, 3, 16, 16] + + # for return_indices=True + MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True) + output, max_indices = MaxPool3d(input) + # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16], + """ + + def __init__(self, + kernel_size, + stride, + padding, + return_indices=False, + ceil_mode=False, + data_format="NCDHW", + name=None): + super(MaxPool3d, self).__init__() + self.ksize = kernel_size + self.stride = stride + self.padding = padding + self.return_indices = return_indices + self.ceil_mode = ceil_mode + self.data_format = data_format + self.name = name + + def forward(self, x): + return F.max_pool3d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + return_indices=self.return_indices, + data_format=self.data_format, + name=self.name) + + +class AvgPool3d(layers.Layer): + """ + This operation applies 3D max pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + + Args: + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (pool_size_Depth, pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, + it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + Otherwise, the pool stride size will be a cube of an int. + padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + ceil_mode (bool): ${ceil_mode_comment} + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is True. + divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: None. + Raises: + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # avg pool3d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32)) + AvgPool3d = nn.AvgPool3d(kernel_size=2, + stride=2, padding=0) + output = AvgPool3d(input) + # output.shape [1, 2, 3, 16, 16] + + """ + + def __init__(self, + kernel_size, + stride, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCDHW", + name=None): + super(AvgPool3d, self).__init__() + self.ksize = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.count_include_pad = count_include_pad + self.divisor = divisor_override + self.data_format = data_format + self.name = name + + def forward(self, x): + return F.avg_pool3d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + count_include_pad=self.count_include_pad, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 2b926b5ab369046fc07c3d3d8cd56431d7f740a7..50a8755ac9f7b0a8e35c60f02a9fb825195ab80f 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -13,4 +13,1107 @@ # limitations under the License. # TODO: define the classes of Transformer neural network -# __all__ = [ ] +__all__ = [ + 'MultiHeadAttention', + 'TransformerEncoderLayer', + 'TransformerEncoder', + 'TransformerDecoderLayer', + 'TransformerDecoder', + 'Transformer', +] + +import copy +import collections + +from ...fluid import layers +from ...fluid.param_attr import ParamAttr +from ...fluid.dygraph import Layer, Linear, Dropout, LayerNorm, LayerList +from .. import functional as F +from ...fluid.layers import utils +from ...fluid.layers.utils import map_structure + + +def _convert_param_attr_to_list(param_attr, n): + """ + If `param_attr` is a list or tuple, convert every element in it to a + ParamAttr instance. Otherwise, repeat `param_attr` `n` times to + construct a list, and rename every one by appending a increasing index + suffix to avoid having same names when `param_attr` contains a name. + + Parameters: + param_attr (list|tuple|ParamAttr): A list, tuple or something can be + converted to a ParamAttr instance by `ParamAttr._to_attr`. + n (int): The times to repeat to construct a list when `param_attr` + is not a list or tuple. + + Returns: + list: A list composed of each including cell's `param_attr`. + """ + if isinstance(param_attr, (list, tuple)): + assert len(param_attr) == n, ( + "length of param_attr should be %d when it is a list/tuple" % n) + param_attrs = [ParamAttr._to_attr(attr) for attr in param_attr] + else: + param_attrs = [] + attr = ParamAttr._to_attr(param_attr) + for i in range(n): + attr_i = copy.deepcopy(attr) + if attr.name: + attr_i.name = attr_i.name + "_" + str(i) + param_attrs.append(attr_i) + return param_attrs + + +class MultiHeadAttention(Layer): + """ + Attention mapps queries and a set of key-value pairs to outputs, and + Multi-Head Attention performs multiple parallel attention to jointly attending + to information from different representation subspaces. + + Please refer to `Attention Is All You Need `_ + for more details. + + Parameters: + embed_dim (int): The expected feature size in the input and output. + num_heads (int): The number of heads in multi-head attention. + dropout (float, optional): The dropout probability used on attention + weights to drop some attention targets. 0 for no dropout. Default 0 + kdim (int, optional): The feature size in key. If None, assumed equal to + `embed_dim`. Default None. + vdim (int, optional): The feature size in value. If None, assumed equal to + `embed_dim`. Default None. + need_weights (bool, optional): Indicate whether to return the attention + weights. Default False. + weight_attr(ParamAttr, optional): To specify the weight parameter property. + Default: None, which means the default weight parameter property is used. + See usage for details in :code:`ParamAttr` . + bias_attr (ParamAttr, optional): To specify the bias parameter property. + Default: None, which means the default bias parameter property is used. + If it is set to False, this layer will not have trainable bias parameter. + See usage for details in :code:`ParamAttr` . + + Examples: + + .. code-block:: python + + import paddle + + # encoder input: [batch_size, sequence_length, d_model] + query = paddle.rand((2, 4, 128)) + # self attention mask: [batch_size, num_heads, query_len, query_len] + attn_mask = paddle.rand((2, 2, 4, 4)) + multi_head_attn = paddle.MultiHeadAttention(128, 2) + output = multi_head_attn(query, attn_mask=attn_mask) # [2, 4, 128] + """ + + Cache = collections.namedtuple("Cache", ["k", "v"]) + StaticCache = collections.namedtuple("StaticCache", ["k", "v"]) + + def __init__(self, + embed_dim, + num_heads, + dropout=0., + kdim=None, + vdim=None, + need_weights=False, + weight_attr=None, + bias_attr=None): + super(MultiHeadAttention, self).__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self.num_heads = num_heads + self.dropout = dropout + self.need_weights = need_weights + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" + + self.q_proj = Linear( + embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) + self.k_proj = Linear( + self.kdim, embed_dim, weight_attr, bias_attr=bias_attr) + self.v_proj = Linear( + self.vdim, embed_dim, weight_attr, bias_attr=bias_attr) + self.out_proj = Linear( + embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) + + def _prepare_qkv(self, query, key, value, cache=None): + """ + Prapares linear projected queries, keys and values for usage of subsequnt + multiple parallel attention. If `cache` is not None, using cached results + to reduce redundant calculations. + + Parameters: + query (Tensor): The queries for multi-head attention. It is a + tensor with shape `[batch_size, query_length, embed_dim]`. The + data type should be float32 or float64. + key (Tensor): The keys for multi-head attention. It is + a tensor with shape `[batch_size, key_length, kdim]`. The + data type should be float32 or float64. If None, use `query` as + `key`. + value (Tensor): The values for multi-head attention. It + is a tensor with shape `[batch_size, value_length, vdim]`. + The data type should be float32 or float64. If None, use `query` as + `value`. + cache (MultiHeadAttention.Cache|MultiHeadAttention.StaticCache, optional): + It is a namedtuple with `k` and `v` as fields, and stores tensors + shaped `[batch_size, num_heads, length, embed_dim]` which are results + of linear projection, reshape and transpose calculations in + MultiHeadAttention. If is an instance of `Cache`, `k` and `v` + fields reserve intermediate results of previous positions, which + mostly used for decoder self attention. If it is an instance of + `StaticCache`, `key` and `value` args would be ignored, `k` and + `v` fields would be used as calculated results on `key` and + `value`, which mostly used for decoder-encoder cross attention. + It is only used for inference and should be None for training. + Default None. + + Returns: + tuple: A tuple including linear projected keys and values. These two \ + tensors have shapes `[batch_size, n_head, sequence_length, d_key]` \ + and `[batch_size, n_head, sequence_length, d_value]` separately, \ + and their data types are same as inputs. + """ + q = self.q_proj(query) + q = layers.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) + q = layers.transpose(x=q, perm=[0, 2, 1, 3]) + + if isinstance(cache, self.StaticCache): + # for encoder-decoder attention in inference and has cached + k, v = cache.k, cache.v + else: + k, v = self.compute_kv(key, value) + + if isinstance(cache, self.Cache): + # for decoder self-attention in inference + k = layers.concat([cache.k, k], axis=2) + v = layers.concat([cache.v, v], axis=2) + cache = self.Cache(k, v) + + return (q, k, v) if cache is None else (q, k, v, cache) + + def compute_kv(self, key, value): + """ + Applies linear projection on input keys and values, then splits heads + (reshape and transpose) to get keys and values from different representation + subspaces. The results are used as key-values pairs for subsequent multiple + parallel attention. + + It is part of calculations in multi-head attention, and is provided as + a method to pre-compute and prefetch these results, thus we can use them + to construct cache for inference. + + Parameters: + key (Tensor): The keys for multi-head attention. It is a tensor + with shape `[batch_size, sequence_length, kdim]`. The data type + should be float32 or float64. + value (Tensor): The values for multi-head attention. It is a tensor + with shape `[batch_size, sequence_length, vdim]`. The data type + should be float32 or float64. + + Returns: + tuple: A tuple including transformed keys and values. Their shapes \ + both are `[batch_size, num_heads, sequence_length, embed_dim // num_heads]`, \ + and their data types are same as inputs. + """ + k = self.k_proj(key) + v = self.v_proj(value) + k = layers.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) + k = layers.transpose(x=k, perm=[0, 2, 1, 3]) + v = layers.reshape(x=v, shape=[0, 0, self.num_heads, self.head_dim]) + v = layers.transpose(x=v, perm=[0, 2, 1, 3]) + return k, v + + def gen_cache(self, key, value=None, type=Cache): + """ + Generates cache for `forward` usage in inference accroding to arguments. + The generated cache is an instance of `MultiHeadAttention.Cache` or an + instance of `MultiHeadAttention.StaticCache`. + + `Cache` or `StaticCache` is namedtuple with `k` and `v` as fields, + and it stores tensors shaped `[batch_size, num_heads, length, embed_dim]` + which are results of linear projection, reshape and transpose calculations + in MultiHeadAttention. + + If the generated cache is an instance of `Cache`, `k` and `v` fields + reserve intermediate result tensors of previous positions, and the tensors + are incremental among decoding steps, which mostly are used for decoder + decoder self attention. + + If the generated cache is an instance of `StaticCache`, `k` and `v` fields + would be used as calculated result tensors on keys an values in `forward`, + and the tensors keep unchanged among decoding steps, which are mostly used + for decoder-encoder cross attention. + + The cache is generated as follows: + + 1. If `type` is `StaticCache`, apply `compute_kv(key, value)` and use the + results to create an instance of `StaticCache`. + + 2. If `type` is `Cache` and `value` is None, generate empty tensors shaped + `[batch_size, num_heads, 0, embed_dim // num_heads]` and use the results + to create an instance of `Cache`, where `batch_size` is from the first + dimension of `key`. + + 3. If `type` is `Cache` and `value` is not None, use `key`, `value` to create + an instance of `Cache`. + + Parameters: + key (Tensor): The keys for multi-head attention. It is + a tensor with shape `[batch_size, key_length, kdim]`. The + data type should be float32 or float64. If `value` is None, + it is only for batch size and data type reference. + value (Tensor, optional): The values for multi-head attention. It + is a tensor with shape `[batch_size, value_length, vdim]`. + The data type should be float32 or float64. If None, `key` is only + for batch size reference. Default None. + type (type): It should be `MultiHeadAttention.StaticCache` or + `MultiHeadAttention.Cache` to indicate the cache type to generate. + + Returns: + namedtuple: an instance of `Cache` or `StaticCache` accordingly. + """ + if type == MultiHeadAttention.StaticCache: # static_kv + k, v = self.compute_kv(key, value) + return self.StaticCache(k, v) + elif value is None: # incremental_state + k = layers.fill_constant_batch_size_like( + input=key, + shape=[-1, self.num_heads, 0, self.head_dim], + dtype=key.dtype, + value=0) + v = layers.fill_constant_batch_size_like( + input=key, + shape=[-1, self.num_heads, 0, self.head_dim], + dtype=key.dtype, + value=0) + return self.Cache(k, v) + else: + # incremental_state with initial value, mainly for usage like UniLM + return self.Cache(key, value) + + def forward(self, query, key, value, attn_mask=None, cache=None): + """ + Applies multi-head attention to map queries and a set of key-value pairs + to outputs. + + Parameters: + query (Tensor): The queries for multi-head attention. It is a + tensor with shape `[batch_size, query_length, embed_dim]`. The + data type should be float32 or float64. + key (Tensor, optional): The keys for multi-head attention. It is + a tensor with shape `[batch_size, key_length, kdim]`. The + data type should be float32 or float64. If None, use `query` as + `key`. Default None. + value (Tensor, optional): The values for multi-head attention. It + is a tensor with shape `[batch_size, value_length, vdim]`. + The data type should be float32 or float64. If None, use `query` as + `value`. Default None. + attn_mask (Tensor, optional): A tensor used in multi-head attention + to prevents attention to some unwanted positions, usually the + paddings or the subsequent positions. It is a tensor with shape + broadcasted to `[batch_size, n_head, sequence_length, sequence_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + cache (MultiHeadAttention.Cache|MultiHeadAttention.StaticCache, optional): + It is a namedtuple with `k` and `v` as fields, and stores tensors + shaped `[batch_size, num_heads, length, embed_dim]` which are results + of linear projection, reshape and transpose calculations in + MultiHeadAttention. If it is an instance of `Cache`, `k` and `v` + fields reserve intermediate results of previous positions, which + mostly used for decoder self attention. If it is an instance of + `StaticCache`, `key` and `value` args would be ignored, `k` and + `v` fields would be used as calculated results on `key` and + `value`, which mostly used for decoder-encoder cross attention. + It is only used for inference and should be None for training. + Default None. + + Returns: + Tensor|tuple: It is a tensor that has the same shape and data type \ + as `query`, representing attention output. Or a tuple if \ + `need_weights` is True or `cache` is not None. If `need_weights` \ + is True, except for attention output, the tuple also includes \ + the attention weights tensor shaped `[batch_size, num_heads, query_length, key_length]`. \ + If `cache` is not None, the tuple then includes the new cache \ + having the same type as `cache`, and if it is `StaticCache`, it \ + is same as the input `cache`, if it is `Cache`, the new cache \ + reserves tensors concatanating raw tensors with intermediate \ + results of current query. + """ + key = query if key is None else key + value = query if value is None else value + # compute q ,k ,v + if cache is None: + q, k, v = self._prepare_qkv(query, key, value, cache) + else: + q, k, v, cache = self._prepare_qkv(query, key, value, cache) + + # scale dot product attention + product = layers.matmul( + x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + if attn_mask is not None: + # TODO(guosheng): support bool mask + product = product + attn_mask + weights = layers.softmax(product) + if self.dropout: + weights = layers.dropout( + weights, + dropout_prob=self.dropout, + dropout_implementation="upscale_in_train", + is_test=False) + + out = layers.matmul(weights, v) + + # combine heads + out = layers.transpose(out, perm=[0, 2, 1, 3]) + out = layers.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) + + # project to output + out = self.out_proj(out) + + outs = [out] + if self.need_weights: + outs.append(weights) + if cache is not None: + outs.append(cache) + return out if len(outs) == 1 else tuple(outs) + + +class TransformerEncoderLayer(Layer): + """ + TransformerEncoderLayer is composed of two sub-layers which are self (multi-head) + attention and feedforward network. Before and after each sub-layer, pre-process + and post-precess would be applied on the input and output accordingly. If + `normalize_before` is True, pre-process is layer normalization and post-precess + includes dropout, residual connection. Otherwise, no pre-process and post-precess + includes dropout, residual connection, layer normalization. + + Parameters: + d_model (int): The expected feature size in the input and output. + nhead (int): The number of heads in multi-head attention(MHA). + dim_feedforward (int): The hidden layer size in the feedforward network(FFN). + dropout (float, optional): The dropout probability used in pre-process + and post-precess of MHA and FFN sub-layer. Default 0.1 + activation (str, optional): The activation function in the feedforward + network. Default relu. + attn_dropout (float, optional): The dropout probability used + in MHA to drop some attention target. If None, use the value of + `dropout`. Default None + act_dropout (float, optional): The dropout probability used after FFN + activition. If None, use the value of `dropout`. Default None + normalize_before (bool, optional): Indicate whether to put layer normalization + into preprocessing of MHA and FFN sub-layers. If True, pre-process is layer + normalization and post-precess includes dropout, residual connection. + Otherwise, no pre-process and post-precess includes dropout, residual + connection, layer normalization. Default False + weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. + If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + MHA, and `weight_attr[1]` would be used as `weight_attr` for linear in FFN. + Otherwise, MHA and FFN both use it as `weight_attr` to create parameters. + Default: None, which means the default weight parameter property is used. + See usage for details in :code:`ParamAttr` . + bias_attr (ParamAttr|tuple, optional): To specify the bias parameter property. + If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + MHA, and `bias_attr[1]` would be used as `bias_attr` for linear in FFN. + Otherwise, MHA and FFN both use it as `bias_attr` to create parameters. + The `False` value means the corresponding layer would not have trainable + bias parameter. See usage for details in :code:`ParamAttr` . Default: None, + which means the default bias parameter property is used. + + + Examples: + + .. code-block:: python + + import paddle + from paddle import TransformerEncoderLayer + + # encoder input: [batch_size, src_len, d_model] + enc_input = paddle.rand((2, 4, 128)) + # self attention mask: [batch_size, n_head, src_len, src_len] + attn_mask = paddle.rand((2, 2, 4, 4)) + encoder_layer = TransformerEncoderLayer(128, 2, 512) + enc_output = encoder_layer(enc_input, attn_mask) # [2, 4, 128] + """ + + def __init__(self, + d_model, + nhead, + dim_feedforward, + dropout=0.1, + activation="relu", + attn_dropout=None, + act_dropout=None, + normalize_before=False, + weight_attr=None, + bias_attr=None): + self._config = locals() + self._config.pop("self") + self._config.pop("__class__", None) # py3 + + super(TransformerEncoderLayer, self).__init__() + attn_dropout = dropout if attn_dropout is None else attn_dropout + act_dropout = dropout if act_dropout is None else act_dropout + self.normalize_before = normalize_before + + weight_attrs = _convert_param_attr_to_list(weight_attr, 2) + bias_attrs = _convert_param_attr_to_list(bias_attr, 2) + + self.self_attn = MultiHeadAttention( + d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0]) + self.linear1 = Linear( + d_model, dim_feedforward, weight_attrs[1], bias_attr=bias_attrs[1]) + self.dropout = Dropout( + act_dropout, dropout_implementation="upscale_in_train") + self.linear2 = Linear( + dim_feedforward, d_model, weight_attrs[1], bias_attr=bias_attrs[1]) + self.norm1 = LayerNorm(d_model) + self.norm2 = LayerNorm(d_model) + self.dropout1 = Dropout( + dropout, dropout_implementation="upscale_in_train") + self.dropout2 = Dropout( + dropout, dropout_implementation="upscale_in_train") + self.activation = getattr(layers, activation) + + def forward(self, src, src_mask=None): + """ + Applies a Transformer encoder layer on the input. + + Parameters: + src (Tensor): The input of Transformer encoder layer. It is + a tensor with shape `[batch_size, sequence_length, d_model]`. + The data type should be float32 or float64. + src_mask (Tensor, optional): A tensor used in multi-head attention + to prevents attention to some unwanted positions, usually the + paddings or the subsequent positions. It is a tensor with shape + broadcasted to `[batch_size, n_head, sequence_length, sequence_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + + Returns: + Tensor: The output of Transformer encoder layer. It is a tensor that \ + has the same shape and data type as `enc_input`. + """ + residual = src + if self.normalize_before: + src = self.norm1(src) + # TODO(guosheng): Add cache for encoder for the usage like UniLM + src = self.self_attn(src, src, src, src_mask) + src = residual + self.dropout1(src) + if not self.normalize_before: + src = self.norm1(src) + + residual = src + if self.normalize_before: + src = self.norm2(src) + src = self.linear2(self.dropout(self.activation(self.linear1(src)))) + src = residual + self.dropout2(src) + if not self.normalize_before: + src = self.norm2(src) + return src + + +class TransformerEncoder(Layer): + """ + TransformerEncoder is a stack of N encoder layers. + + Parameters: + encoder_layer (Layer): an instance of the `TransformerEncoderLayer`. It + would be used as the first layer, and the other layers would be created + according to the configurations of it. + num_layers (int): The number of encoder layers to be stacked. + norm (LayerNorm, optional): the layer normalization component. If provided, + apply layer normalization on the output of last encoder layer. + + Examples: + + .. code-block:: python + + import paddle + from paddle import TransformerEncoderLayer, TransformerEncoder + + # encoder input: [batch_size, src_len, d_model] + enc_input = paddle.rand((2, 4, 128)) + # self attention mask: [batch_size, n_head, src_len, src_len] + attn_mask = paddle.rand((2, 2, 4, 4)) + encoder_layer = TransformerEncoderLayer(128, 2, 512) + encoder = TransformerEncoder(encoder_layer, 2) + enc_output = encoder(enc_input, attn_mask) # [2, 4, 128] + """ + + def __init__(self, encoder_layer, num_layers, norm=None): + super(TransformerEncoder, self).__init__() + self.layers = LayerList([(encoder_layer if i == 0 else + type(encoder_layer)(**encoder_layer._config)) + for i in range(num_layers)]) + self.num_layers = num_layers + self.norm = norm + + def forward(self, src, src_mask=None): + """ + Applies a stack of N Transformer encoder layers on inputs. If `norm` is + provided, also applies layer normalization on the output of last encoder + layer. + + Parameters: + src (Tensor): The input of Transformer encoder. It is a tensor + with shape `[batch_size, sequence_length, d_model]`. The data + type should be float32 or float64. + src_mask (Tensor, optional): A tensor used in multi-head attention + to prevents attention to some unwanted positions, usually the + paddings or the subsequent positions. It is a tensor with shape + broadcasted to `[batch_size, n_head, sequence_length, sequence_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + + Returns: + Tensor: The output of Transformer encoder. It is a tensor that \ + has the same shape and data type as `src`. + """ + output = src + + for mod in self.layers: + output = mod(output, src_mask=src_mask) + + if self.norm is not None: + output = self.norm(output) + + return output + + +class TransformerDecoderLayer(Layer): + """ + TransformerDecoderLayer is composed of three sub-layers which are decoder + self (multi-head) attention, decoder-encoder cross attention and feedforward + network. Before and after each sub-layer, pre-process and post-precess would + be applied on the input and output accordingly. If `normalize_before` is True, + pre-process is layer normalization and post-precess includes dropout, residual + connection. Otherwise, no pre-process and post-precess includes dropout, residual + connection, layer normalization. + + Parameters: + d_model (int): The expected feature size in the input and output. + nhead (int): The number of heads in multi-head attention(MHA). + dim_feedforward (int): The hidden layer size in the feedforward network(FFN). + dropout (float, optional): The dropout probability used in pre-process + and post-precess of MHA and FFN sub-layer. Default 0.1 + activation (str, optional): The activation function in the feedforward + network. Default relu. + attn_dropout (float, optional): The dropout probability used + in MHA to drop some attention target. If None, use the value of + `dropout`. Default None + act_dropout (float, optional): The dropout probability used after FFN + activition. If None, use the value of `dropout`. Default None + normalize_before (bool, optional): Indicate whether to put layer normalization + into preprocessing of MHA and FFN sub-layers. If True, pre-process is layer + normalization and post-precess includes dropout, residual connection. + Otherwise, no pre-process and post-precess includes dropout, residual + connection, layer normalization. Default False + weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. + If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + self attention, `weight_attr[1]` would be used as `weight_attr` for + cross attention, and `weight_attr[2]` would be used as `weight_attr` + for linear in FFN. Otherwise, the three sub-layers all uses it as + `weight_attr` to create parameters. Default: None, which means the + default weight parameter property is used. See usage for details + in :ref:`api_fluid_ParamAttr` . + bias_attr (ParamAttr|tuple, optional): To specify the bias parameter property. + If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + self attention, `bias_attr[1]` would be used as `bias_attr` for + cross attention, and `bias_attr[2]` would be used as `bias_attr` + for linear in FFN. Otherwise, the three sub-layers all uses it as + `bias_attr` to create parameters. The `False` value means the + corresponding layer would not have trainable bias parameter. See + usage for details in :code:`ParamAttr` . Default: None,which means + the default bias parameter property is used. + + Examples: + + .. code-block:: python + + import paddle + from paddle import TransformerDecoderLayer + + # decoder input: [batch_size, tgt_len, d_model] + dec_input = paddle.rand((2, 4, 128)) + # encoder output: [batch_size, src_len, d_model] + enc_output = paddle.rand((2, 6, 128)) + # self attention mask: [batch_size, n_head, tgt_len, tgt_len] + self_attn_mask = paddle.rand((2, 2, 4, 4)) + # cross attention mask: [batch_size, n_head, tgt_len, src_len] + cross_attn_mask = paddle.rand((2, 2, 4, 6)) + decoder_layer = TransformerDecoderLayer(128, 2, 512) + output = decoder_layer(dec_input, + enc_output, + self_attn_mask, + cross_attn_mask) # [2, 4, 128] + """ + + def __init__(self, + d_model, + nhead, + dim_feedforward, + dropout=0.1, + activation="relu", + attn_dropout=None, + act_dropout=None, + normalize_before=False, + weight_attr=None, + bias_attr=None): + self._config = locals() + self._config.pop("self") + self._config.pop("__class__", None) # py3 + + super(TransformerDecoderLayer, self).__init__() + attn_dropout = dropout if attn_dropout is None else attn_dropout + act_dropout = dropout if act_dropout is None else act_dropout + self.normalize_before = normalize_before + + weight_attrs = _convert_param_attr_to_list(weight_attr, 3) + bias_attrs = _convert_param_attr_to_list(bias_attr, 3) + + self.self_attn = MultiHeadAttention( + d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[0], + bias_attr=bias_attrs[0]) + self.cross_attn = MultiHeadAttention( + d_model, + nhead, + dropout=attn_dropout, + weight_attr=weight_attrs[1], + bias_attr=bias_attrs[1]) + self.linear1 = Linear( + d_model, dim_feedforward, weight_attrs[2], bias_attr=bias_attrs[2]) + self.dropout = Dropout( + act_dropout, dropout_implementation="upscale_in_train") + self.linear2 = Linear( + dim_feedforward, d_model, weight_attrs[2], bias_attr=bias_attrs[2]) + self.norm1 = LayerNorm(d_model) + self.norm2 = LayerNorm(d_model) + self.norm3 = LayerNorm(d_model) + self.dropout1 = Dropout( + dropout, dropout_implementation="upscale_in_train") + self.dropout2 = Dropout( + dropout, dropout_implementation="upscale_in_train") + self.dropout3 = Dropout( + dropout, dropout_implementation="upscale_in_train") + self.activation = getattr(layers, activation) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + """ + Applies a Transformer decoder layer on the input. + + Parameters: + tgt (Tensor): The input of Transformer decoder layer. It is a tensor + with shape `[batch_size, target_length, d_model]`. The data type + should be float32 or float64. + memory (Tensor): The output of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + tgt_mask (Tensor, optional): A tensor used in self attention + to prevents attention to some unwanted positions, usually the + the subsequent positions. It is a tensor with shape broadcasted + to `[batch_size, n_head, target_length, target_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + memory_mask (Tensor, optional): A tensor used in decoder-encoder + cross attention to prevents attention to some unwanted positions, + usually the paddings. It is a tensor with shape broadcasted to + `[batch_size, n_head, target_length, source_length]`, where the + unwanted positions have `-INF` values and the others have 0 values. + The data type should be float32 or float64. It can be None when + nothing wanted or needed to be prevented attention to. Default None + cache (tuple, optional): It is a tuple( :code:`(incremental_cache, static_cache)` ), + `incremental_cache` is an instance of `MultiHeadAttention.Cache`, + `static_cache` is an instance of `MultiHeadAttention.StaticCache. + See `TransformerDecoderLayer.gen_cache` for more details. It is + only used for inference and should be None for training. Default + None. + + Returns: + Tensor|tuple: It is a tensor that has the same shape and data type \ + as `tgt`, representing the output of Transformer decoder layer. \ + Or a tuple if `cache` is not None, except for decoder layer output, \ + the tuple includes the new cache which is same as input `cache` \ + argument but `incremental_cache` in it has an incremental length. \ + See `MultiHeadAttention.gen_cache` and `MultiHeadAttention.forward` \ + for more details. + """ + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if cache is None: + tgt = self.cross_attn(tgt, memory, memory, memory_mask, None) + else: + tgt, static_cache = self.cross_attn(tgt, memory, memory, + memory_mask, cache[1]) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, + static_cache)) + + def gen_cache(self, memory): + """ + Generates cache for `forward` usage. The generated cache is a tuple + composed of an instance of `MultiHeadAttention.Cache` and an instance + of `MultiHeadAttention.StaticCache`. + + Parameters: + memory (Tensor): The output of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + + Returns: + tuple: It is a tuple( :code:`(incremental_cache, static_cache)` ). \ + `incremental_cache` is an instance of `MultiHeadAttention.Cache` \ + produced by `self_attn.gen_cache(memory, MultiHeadAttention.Cache)`, \ + it reserves two tensors shaped `[batch_size, nhead, 0, d_model // nhead]`. \ + `static_cache` is an instance of `MultiHeadAttention.StaticCache` \ + produced by `cross_attn.gen_cache(memory, MultiHeadAttention.StaticCache)`, \ + it reserves two tensors shaped `[batch_size, nhead, source_length, d_model // nhead]`. + See `MultiHeadAttention.gen_cache` and `MultiHeadAttention.forward` \ + for more details. + """ + incremental_cache = self.self_attn.gen_cache( + memory, type=self.self_attn.Cache) + static_cache = self.cross_attn.gen_cache( + memory, memory, type=self.cross_attn.StaticCache) + return incremental_cache, static_cache + + +class TransformerDecoder(Layer): + """ + TransformerDecoder is a stack of N decoder layers. + + Parameters: + decoder_layer (Layer): an instance of the `TransformerDecoderLayer`. It + would be used as the first layer, and the other layers would be created + according to the configurations of it. + num_layers (int): The number of decoder layers to be stacked. + norm (LayerNorm, optional): the layer normalization component. If provided, + apply layer normalization on the output of last encoder layer. + + Examples: + + .. code-block:: python + + import paddle + from paddle import TransformerDecoderLayer, TransformerDecoder + + # decoder input: [batch_size, tgt_len, d_model] + dec_input = paddle.rand((2, 4, 128)) + # encoder output: [batch_size, src_len, d_model] + enc_output = paddle.rand((2, 6, 128)) + # self attention mask: [batch_size, n_head, tgt_len, tgt_len] + self_attn_mask = paddle.rand((2, 2, 4, 4)) + # cross attention mask: [batch_size, n_head, tgt_len, src_len] + cross_attn_mask = paddle.rand((2, 2, 4, 6)) + decoder_layer = TransformerDecoderLayer(128, 2, 512) + decoder = TransformerDecoder(decoder_layer, 2) + output = decoder(dec_input, + enc_output, + self_attn_mask, + cross_attn_mask) # [2, 4, 128] + """ + + def __init__(self, decoder_layer, num_layers, norm=None): + super(TransformerDecoder, self).__init__() + self.layers = LayerList([(decoder_layer if i == 0 else + type(decoder_layer)(**decoder_layer._config)) + for i in range(num_layers)]) + self.num_layers = num_layers + self.norm = norm + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + """ + Applies a stack of N Transformer decoder layers on inputs. If `norm` is + provided, also applies layer normalization on the output of last decoder + layer. + + Parameters: + tgt (Tensor): The input of Transformer decoder. It is a tensor + with shape `[batch_size, target_length, d_model]`. The data type + should be float32 or float64. + memory (Tensor): The output of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + tgt_mask (Tensor, optional): A tensor used in self attention + to prevents attention to some unwanted positions, usually the + the subsequent positions. It is a tensor with shape broadcasted + to `[batch_size, n_head, target_length, target_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + memory_mask (Tensor, optional): A tensor used in decoder-encoder + cross attention to prevents attention to some unwanted positions, + usually the paddings. It is a tensor with shape broadcasted to + `[batch_size, n_head, target_length, source_length]`, where the + unwanted positions have `-INF` values and the others have 0 values. + The data type should be float32 or float64. It can be None when + nothing wanted or needed to be prevented attention to. Default None + cache (list, optional): It is a list, and each element in the list + is a tuple( :code:`(incremental_cache, static_cache)` ). See + `TransformerDecoder.gen_cache` for more details. It is only + used for inference and should be None for training. Default None. + + Returns: + Tensor|tuple: It is a tensor that has the same shape and data type \ + as `tgt`, representing the output of Transformer decoder. \ + Or a tuple if `cache` is not None, except for decoder output, \ + the tuple includes the new cache which is same as input `cache` \ + argument but `incremental_cache` in it has an incremental length. \ + See `MultiHeadAttention.gen_cache` and `MultiHeadAttention.forward` \ + for more details. + """ + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + def gen_cache(self, memory, do_zip=False): + """ + Generates cache for `forward` usage. The generated cache is a list, and + each element in it is a tuple( :code:`(incremental_cache, static_cache)` ) + produced by `TransformerDecoderLayer.gen_cache`. See `TransformerDecoderLayer.gen_cache` + for more details. If `do_zip` is True, apply `zip` on these tuples to get + a list with two elements. + + + Parameters: + memory (Tensor): The output of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + do_zip (bool, optional): Indicate whether to apply `zip` on the tuples. + If True, return a list with two elements. Default False + + Returns: + list: It is a list, and each element in the list is a tuple produced \ + by `TransformerDecoderLayer.gen_cache(memory)`. See `TransformerDecoderLayer.gen_cache` \ + for more details. If `do_zip` is True, apply `zip` on these tuples \ + and return a list with two elements. + """ + cache = [layer.gen_cache(memory) for layer in self.layers] + if do_zip: + cache = list(zip(*cache)) + return cache + + +class Transformer(Layer): + """ + A Transformer model composed of an instance of `TransformerEncoder` and an + instance of `TransformerDecoder`. While the embedding layer and output layer + are not included. + + Please refer to `Attention is all you need `_ , + and see `TransformerEncoder` and `TransformerDecoder` for more details. + + Users can configurate the model architecture with corresponding parameters. + Note the usage of `normalize_before` representing where to apply layer + normalization (in pre-process or post-precess of multi-head attention or FFN), + and some transformer like models are different on this, such as + `BERT `_ and `GPT2 `_ . + The default architecture here places layer normalization in post-process and + applies another layer normalization on the output of last encoder/decoder layer. + + Parameters: + d_model (int): The expected feature size in the encoder/decoder input + and output. + nhead (int): The number of heads in multi-head attention(MHA). + num_encoder_layers (int): The number of layers in encoder. + num_encoder_layers (int): The number of layers in decoder. + dim_feedforward (int): The hidden layer size in the feedforward network(FFN). + dropout (float, optional): The dropout probability used in pre-process + and post-precess of MHA and FFN sub-layer. Default 0.1 + activation (str, optional): The activation function in the feedforward + network. Default relu. + attn_dropout (float, optional): The dropout probability used + in MHA to drop some attention target. If None, use the value of + `dropout`. Default None + act_dropout (float, optional): The dropout probability used after FFN + activition. If None, use the value of `dropout`. Default None + normalize_before (bool, optional): Indicate whether to put layer normalization + into preprocessing of MHA and FFN sub-layers. If True, pre-process is layer + normalization and post-precess includes dropout, residual connection. + Otherwise, no pre-process and post-precess includes dropout, residual + connection, layer normalization. Default False + weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. + If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + self attention, `weight_attr[1]` would be used as `weight_attr` for + cross attention, and `weight_attr[2]` would be used as `weight_attr` + for linear in FFN. Otherwise, the three sub-layers all uses it as + `weight_attr` to create parameters. Default: None, which means the + default weight parameter property is used. See usage for details + in :code:`ParamAttr` . + bias_attr (ParamAttr|tuple, optional): To specify the bias parameter property. + If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + self attention, `bias_attr[1]` would be used as `bias_attr` for + cross attention, and `bias_attr[2]` would be used as `bias_attr` + for linear in FFN. Otherwise, the three sub-layers all uses it as + `bias_attr` to create parameters. The `False` value means the + corresponding layer would not have trainable bias parameter. See + usage for details in :code:`ParamAttr` . Default: None,which means + the default bias parameter property is used. + custom_encoder (Layer): If custom encoder is provided, use it as the encoder. + Default None + custom_decoder (Layer): If custom decoder is provided, use it as the decoder. + Default None + + Examples: + + .. code-block:: python + + import paddle + from paddle import Transformer + + # src: [batch_size, tgt_len, d_model] + enc_input = paddle.rand((2, 4, 128)) + # tgt: [batch_size, src_len, d_model] + dec_input = paddle.rand((2, 6, 128)) + # src_mask: [batch_size, n_head, src_len, src_len] + enc_self_attn_mask = paddle.rand((2, 2, 4, 4)) + # tgt_mask: [batch_size, n_head, tgt_len, tgt_len] + dec_self_attn_mask = paddle.rand((2, 2, 6, 6)) + # memory_mask: [batch_size, n_head, tgt_len, src_len] + cross_attn_mask = paddle.rand((2, 2, 6, 4)) + transformer = Transformer(128, 2, 4, 4, 512) + output = transformer(enc_input, + dec_input, + enc_self_attn_mask, + dec_self_attn_mask, + cross_attn_mask) # [2, 6, 128] + """ + + def __init__(self, + d_model=512, + nhead=8, + num_encoder_layers=6, + num_decoder_layers=6, + dim_feedforward=2048, + dropout=0.1, + activation="relu", + attn_dropout=None, + act_dropout=None, + normalize_before=False, + weight_attr=None, + bias_attr=None, + custom_encoder=None, + custom_decoder=None): + super(Transformer, self).__init__() + + if custom_encoder is not None: + self.encoder = custom_encoder + else: + encoder_layer = TransformerEncoderLayer( + d_model, nhead, dim_feedforward, dropout, activation, + attn_dropout, act_dropout, normalize_before, weight_attr, + bias_attr) + encoder_norm = LayerNorm(d_model) + self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, + encoder_norm) + + if custom_decoder is not None: + self.decoder = custom_decoder + else: + decoder_layer = TransformerDecoderLayer( + d_model, nhead, dim_feedforward, dropout, activation, + attn_dropout, act_dropout, normalize_before, weight_attr, + bias_attr) + decoder_norm = LayerNorm(d_model) + self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, + decoder_norm) + + self.d_model = d_model + self.nhead = nhead + + def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None): + """ + Applies a Transformer model on the inputs. + + Parameters: + src (Tensor): The input of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + tgt (Tensor): The input of Transformer decoder. It is a tensor + with shape `[batch_size, target_length, d_model]`. The data type + should be float32 or float64. + memory (Tensor): The output of Transformer encoder. It is a tensor + with shape `[batch_size, source_length, d_model]`. The data type + should be float32 or float64. + tgt_mask (Tensor, optional): A tensor used in self attention + to prevents attention to some unwanted positions, usually the + the subsequent positions. It is a tensor with shape broadcasted + to `[batch_size, n_head, target_length, target_length]`, + where the unwanted positions have `-INF` values and the others + have 0 values. The data type should be float32 or float64. It can + be None when nothing wanted or needed to be prevented attention to. + Default None + memory_mask (Tensor, optional): A tensor used in decoder-encoder + cross attention to prevents attention to some unwanted positions, + usually the paddings. It is a tensor with shape broadcasted to + `[batch_size, n_head, target_length, source_length]`, where the + unwanted positions have `-INF` values and the others have 0 values. + The data type should be float32 or float64. It can be None when + nothing wanted or needed to be prevented attention to. Default None + + Returns: + Tensor: It is a tensor that has the same shape and data type \ + as `tgt`, representing the output of Transformer decoder. + """ + memory = self.encoder(src, src_mask=src_mask) + output = self.decoder( + tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask) + return output diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f360ec02e6d8b59b80db4602776e904cf0b499 --- /dev/null +++ b/python/paddle/nn/layer/vision.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: define specitial functions used in computer vision task + +from ...fluid.dygraph import layers +from .. import functional + +__all__ = ['PixelShuffle'] + + +class PixelShuffle(layers.Layer): + """ + + PixelShuffle Layer + + This operator rearranges elements in a tensor of shape [N, C, H, W] + to a tensor of shape [N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor], + or from shape [N, H, W, C] to [N, H*upscale_factor, W*upscale_factor, C/upscale_factor**2]. + This is useful for implementing efficient sub-pixel convolution + with a stride of 1/upscale_factor. + Please refer to the paper: `Real-Time Single Image and Video Super-Resolution + Using an Efficient Sub-Pixel Convolutional Neural Network `_ . + by Shi et. al (2016) for more details. + + Parameters: + + upscale_factor(int): factor to increase spatial resolution. + data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - x: 4-D tensor with shape: (N, C, H, W) or (N, H, W, C). + - out: 4-D tensor with shape: (N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor) or (N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2). + + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + + paddle.disable_static() + x = np.random.randn(2, 9, 4, 4).astype(np.float32) + x_var = paddle.to_tensor(x) + pixel_shuffle = nn.PixelShuffle(3) + out_var = pixel_shuffle(x_var) + out = out_var.numpy() + print(out.shape) + # (2, 1, 12, 12) + + """ + + def __init__(self, upscale_factor, data_format="NCHW", name=None): + super(PixelShuffle, self).__init__() + + if not isinstance(upscale_factor, int): + raise TypeError("upscale factor must be int type") + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError("Data format should be 'NCHW' or 'NHWC'." + "But recevie data format: {}".format(data_format)) + + self._upscale_factor = upscale_factor + self._data_format = data_format + self._name = name + + def forward(self, x): + return functional.pixel_shuffle(x, self._upscale_factor, + self._data_format, self._name) diff --git a/python/paddle/nn/input.py b/python/paddle/nn/utils/__init__.py similarity index 81% rename from python/paddle/nn/input.py rename to python/paddle/nn/utils/__init__.py index b5f591f44a9a167dcba8e4e46322ca157a5e48cb..6562ac35e1e3180db671f90188f1304f07864189 100644 --- a/python/paddle/nn/input.py +++ b/python/paddle/nn/utils/__init__.py @@ -12,10 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define input placeholders of neural network -from ..fluid import data #DEFINE_ALIAS - -__all__ = [ - 'data', - # 'Input' -] +from . import weight_norm_hook +from .weight_norm_hook import weight_norm, remove_weight_norm diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py new file mode 100644 index 0000000000000000000000000000000000000000..ad53bf394660f3a7e0e48fdbd5eb530abd0852bb --- /dev/null +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -0,0 +1,225 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from ... import fluid +from ...fluid import dygraph +from ...fluid import layers as F +from ...fluid.layer_helper import LayerHelper +from ...fluid.data_feeder import check_variable_and_dtype +from ...tensor.math import multiply + +__all__ = ['weight_norm', 'remove_weight_norm'] + + +def l2_norm(x, axis, epsilon=1e-12, name=None): + if len(x.shape) == 1: + axis = 0 + check_variable_and_dtype(x, "X", ("float32", "float64"), "norm") + + helper = LayerHelper("l2_normalize", **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + norm = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type="norm", + inputs={"X": x}, + outputs={"Out": out, + "Norm": norm}, + attrs={ + "axis": 1 if axis is None else axis, + "epsilon": epsilon, + }) + return F.squeeze(norm, axes=[axis]) + + +def norm_except_dim(p, dim): + shape = p.shape + ndims = len(shape) + if dim == -1: + return F.sqrt(F.reduce_sum(F.square(p)) + 1e-12) + elif dim == 0: + p_matrix = F.reshape(p, (shape[0], -1)) + return l2_norm(p_matrix, axis=1) + elif dim == ndims - 1: + p_matrix = F.reshape(p, (-1, shape[-1])) + return l2_norm(p_matrix, axis=0) + else: + perm = list(range(ndims)) + perm[0] = dim + perm[dim] = 0 + p_transposed = F.transpose(p, perm) + return norm_except_dim(p_transposed, 0) + + +def _weight_norm(v, g, dim): + shape = v.shape + ndims = len(shape) + + if dim == -1: + v_normalized = v / (F.sqrt(F.reduce_sum(F.square(v))) + 1e-12) + elif dim == 0: + p_matrix = F.reshape(v, (shape[0], -1)) + v_normalized = F.l2_normalize(p_matrix, axis=1) + v_normalized = F.reshape(v_normalized, shape) + elif dim == ndims - 1: + p_matrix = F.reshape(v, (-1, shape[-1])) + v_normalized = F.l2_normalize(p_matrix, axis=0) + v_normalized = F.reshape(v_normalized, shape) + else: + perm = list(range(ndims)) + perm[0] = dim + perm[dim] = 0 + p_transposed = F.transpose(v, perm) + transposed_shape = p_transposed.shape + p_matrix = F.reshape(p_transposed, (p_transposed.shape[0], -1)) + v_normalized = F.l2_normalize(p_matrix, axis=1) + v_normalized = F.reshape(v_normalized, transposed_shape) + v_normalized = F.transpose(v_normalized, perm) + weight = multiply(v_normalized, g, axis=dim if dim is not None else -1) + return weight + + +class WeightNorm(object): + def __init__(self, name, dim): + if dim is None: + dim = -1 + self.name = name + self.dim = dim + + def compute_weight(self, layer): + g = getattr(layer, self.name + '_g') + v = getattr(layer, self.name + '_v') + return _weight_norm(v, g, self.dim) + + @staticmethod + def apply(layer, name, dim): + for k, hook in layer._forward_pre_hooks.items(): + if isinstance(hook, WeightNorm) and hook.name == name: + raise RuntimeError("Cannot register two weight_norm hooks on " + "the same parameter {}".format(name)) + + if dim is None: + dim = -1 + + fn = WeightNorm(name, dim) + + w = getattr(layer, name) + del layer._parameters[name] + + g_var = norm_except_dim(w, dim) + v = layer.create_parameter(w.shape, dtype=w.dtype) + layer.add_parameter(name + "_v", v) + g = layer.create_parameter(g_var.shape, dtype=g_var.dtype) + layer.add_parameter(name + '_g', g) + with dygraph.no_grad(): + F.assign(w, v) + F.assign(g_var, g) + setattr(layer, name, fn.compute_weight(layer)) + + layer.register_forward_pre_hook(fn) + return fn + + def remove(self, layer): + w_var = self.compute_weight(layer) + delattr(layer, self.name) + del layer._parameters[self.name + '_g'] + del layer._parameters[self.name + '_v'] + w = layer.create_parameter(w_var.shape, dtype=w_var.dtype) + layer.add_parameter(self.name, w) + with dygraph.no_grad(): + F.assign(w_var, w) + + def __call__(self, layer, inputs): + setattr(layer, self.name, self.compute_weight(layer)) + + +def weight_norm(layer, name='weight', dim=0): + """ + This weight_norm layer applies weight normalization to a parameter according to the + following formula: + + .. math:: + + \mathbf{w} = g \dfrac{v}{\|v\|} + + Weight normalization is a reparameterization of the weight vectors in a neural network that + decouples the magnitude of those weight vectors from their direction. Weight normalization + replaces the parameter specified by `name`(eg: 'weight') with two parameters: one parameter + specifying the magnitude (eg: 'weight_g') and one parameter specifying the direction + (eg: 'weight_v'). Weight normalization has been implemented as discussed in this paper: + `Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks + `_. + + Parameters: + layer(Layer): Layer of paddle, which has weight. + name(str, optional): Name of the weight parameter. Default: 'weight'. + dim(int, optional): Dimension over which to compute the norm. Dim is a non-negative number + which is less than the rank of weight Tensor. For Example, dim can be chosen from 0, + 1, 2, 3 for convolution whose weight shape is [cout, cin, kh, kw] and rank is 4. + If dim is set to None, meaning that all elements will be normalized. Default: 0. + + Returns: + Origin layer with weight norm hook. + + Examples: + .. code-block:: python + + import numpy as np + from paddle.nn import Conv2D + from paddle.nn.utils import weight_norm + + x = np.array([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32') + paddle.disable_static() + conv = Conv2D(3, 5, 3) + wn = weight_norm(conv) + print(conv.weight_g.shape) + # [5] + print(conv.weight_v.shape) + # [5, 3, 3, 3] + """ + WeightNorm.apply(layer, name, dim) + return layer + + +def remove_weight_norm(layer, name='weight'): + """ + remove weight normalization from layer. + + Parameters: + layer(Layer): Layer of paddle, which has weight. + name(str, optional): Name of the weight parameter. Default: 'weight'. + + Returns: + Origin layer without weight norm + + Examples: + .. code-block:: python + import paddle + from paddle.nn import Conv2D + from paddle.nn.utils import weight_norm, remove_weight_norm + + paddle.disable_static() + conv = Conv2D(3, 5, 3) + wn = weight_norm(conv) + remove_weight_norm(conv) + print(conv.weight_g) + # AttributeError: 'Conv2D' object has no attribute 'weight_g' + """ + for k, hook in layer._forward_pre_hooks.items(): + if isinstance(hook, WeightNorm) and hook.name == name: + hook.remove(layer) + del layer._forward_pre_hooks[k] + return layer + + raise ValueError("weight_norm of '{}' not found in {}".format(name, layer)) diff --git a/python/paddle/optimizer/__init__.py b/python/paddle/optimizer/__init__.py index 4dc3cf397aea59f3fedfc86bff7a77556a6a63a7..49314c9832dd389411dffb3f498b34d09337a3f0 100644 --- a/python/paddle/optimizer/__init__.py +++ b/python/paddle/optimizer/__init__.py @@ -14,21 +14,32 @@ __all__ = [ 'Adadelta', 'AdadeltaOptimizer', 'Adagrad', 'AdagradOptimizer', 'Adam', - 'Adamax', 'AdamaxOptimizer', 'AdamOptimizer', 'DecayedAdagrad', - 'DecayedAdagradOptimizer', 'DGCMomentumOptimizer', 'Dpsgd', - 'DpsgdOptimizer', 'ExponentialMovingAverage', 'Ftrl', 'FtrlOptimizer', - 'LambOptimizer', 'LarsMomentum', 'LarsMomentumOptimizer', - 'LookaheadOptimizer', 'ModelAverage', 'Momentum', 'MomentumOptimizer', - 'PipelineOptimizer', 'RecomputeOptimizer', 'RMSPropOptimizer', 'SGD', - 'SGDOptimizer' + 'Adamax', 'AdamW', 'DecayedAdagrad', 'DecayedAdagradOptimizer', + 'DGCMomentumOptimizer', 'Dpsgd', 'DpsgdOptimizer', + 'ExponentialMovingAverage', 'Ftrl', 'FtrlOptimizer', 'LambOptimizer', + 'LarsMomentum', 'LarsMomentumOptimizer', 'LookaheadOptimizer', + 'ModelAverage', 'Momentum', 'MomentumOptimizer', 'PipelineOptimizer', + 'RecomputeOptimizer', 'RMSProp', 'SGD', 'SGDOptimizer', 'Optimizer', + '_LRScheduler', 'NoamLR', 'PiecewiseLR', 'NaturalExpLR', 'InverseTimeLR', + 'PolynomialLR', 'LinearLrWarmup', 'ExponentialLR', 'MultiStepLR', 'StepLR', + 'LambdaLR', 'ReduceLROnPlateau', 'CosineAnnealingLR' ] -from ..fluid.optimizer import SGD, Momentum, Adagrad, Adam, Adamax, Dpsgd, DecayedAdagrad, \ - Ftrl, SGDOptimizer, MomentumOptimizer, AdagradOptimizer, \ - AdamOptimizer, AdamaxOptimizer, DpsgdOptimizer, \ - DecayedAdagradOptimizer, RMSPropOptimizer, FtrlOptimizer, Adadelta, \ - AdadeltaOptimizer, ModelAverage, LarsMomentum, \ - LarsMomentumOptimizer, DGCMomentumOptimizer, LambOptimizer, \ +from ..fluid.optimizer import SGD, Momentum, Adagrad, Dpsgd, DecayedAdagrad, \ + Ftrl, Adadelta, \ + SGDOptimizer, MomentumOptimizer, AdagradOptimizer,DpsgdOptimizer,\ + DecayedAdagradOptimizer,FtrlOptimizer,AdadeltaOptimizer, \ + ModelAverage, LarsMomentum, DGCMomentumOptimizer, LambOptimizer,\ ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, \ - RecomputeOptimizer + RecomputeOptimizer, LarsMomentumOptimizer + +from .optimizer import Optimizer +from .adam import Adam +from .adamw import AdamW +from .adamax import Adamax +from .rmsprop import RMSProp + +from . import lr_scheduler +from .lr_scheduler import _LRScheduler, NoamLR, PiecewiseLR, NaturalExpLR, InverseTimeLR, PolynomialLR, \ + LinearLrWarmup, ExponentialLR, MultiStepLR, StepLR, LambdaLR, ReduceLROnPlateau, CosineAnnealingLR diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py new file mode 100644 index 0000000000000000000000000000000000000000..0da8053fe8a3495f5d3188a737638531347de648 --- /dev/null +++ b/python/paddle/optimizer/adam.py @@ -0,0 +1,246 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .optimizer import Optimizer +from ..fluid import core +from ..fluid import framework +from ..fluid.framework import Variable + +__all__ = ["Adam"] + + +class Adam(Optimizer): + """ + The Adam optimizer uses an optimization described at the end + of section 2 of `Adam paper `_ , + it can dynamically adjusts the learning rate of each parameter using + the 1st moment estimates and the 2nd moment estimates of the gradient. + + The parameter ``param_out`` update rule with gradient ``grad``: + + .. math:: + + t & = t + 1 + + moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad + + moment\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad + + learning\_rate & = learning\_rate * \\ + \\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_1}{\sqrt{moment\_2} + \epsilon} + + Related paper: `Adam: A Method for Stochastic Optimization `_ + + Args: + learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. + It can be a float value or a LearningRateDecay. The default value is 0.001. + beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. + It should be a float number or a Tensor with shape [1] and data type as float32. + The default value is 0.9. + beta2 (float|Tensor, optional): The exponential decay rate for the 2nd moment estimates. + It should be a float number or a Tensor with shape [1] and data type as float32. + The default value is 0.999. + epsilon (float, optional): A small float value for numerical stability. + The default value is 1e-08. + parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. + grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three cliping strategies + ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , + :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. + name (str, optional): Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name`. + The default value is None. + lazy_mode (bool, optional): The official Adam algorithm has two moving-average accumulators. + The accumulators are updated at every step. Every element of the two moving-average + is updated in both dense mode and sparse mode. If the size of parameter is very large, + then the update may be very slow. The lazy mode only update the element that has + gradient in current mini-batch, so it will be much more faster. But this mode has + different semantics with the original Adam algorithm and may lead to different result. + The default value is False. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + adam = paddle.optimizer.Adam(learning_rate=0.1, + parameters=linear.parameters()) + out.backward() + adam.step() + adam.clear_grad() + + .. code-block:: python + + # Adam with beta1/beta2 as Tensor and weight_decay as float + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + + adam = paddle.optimizer.Adam(learning_rate=0.1, + parameters=linear.parameters(), + beta1=beta1, + beta2=beta2, + weight_decay=0.01) + out.backward() + adam.step() + adam.clear_grad() + + """ + _moment1_acc_str = "moment1" + _moment2_acc_str = "moment2" + _beta1_pow_acc_str = "beta1_pow_acc" + _beta2_pow_acc_str = "beta2_pow_acc" + + def __init__(self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + parameters=None, + weight_decay=None, + grad_clip=None, + name=None, + lazy_mode=False): + assert learning_rate is not None + assert beta1 is not None + assert beta2 is not None + assert epsilon is not None + super(Adam, self).__init__( + learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) + self.type = "adam" + self._beta1 = beta1 + self._beta2 = beta2 + self._epsilon = epsilon + self._lazy_mode = lazy_mode + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + # Create accumulator tensors for first and second moments + for p in parameters: + self._add_accumulator(self._moment1_acc_str, p) + self._add_accumulator(self._moment2_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + fill_value=0.9 if isinstance(self._beta1, Variable) \ + else self._beta1, + shape=[1], + type=core.VarDesc.VarType.LOD_TENSOR, device='cpu') + self._add_accumulator( + name=self._beta2_pow_acc_str, + param=p, + fill_value=0.999 if isinstance(self._beta2, Variable) \ + else self._beta2, + shape=[1], + type=core.VarDesc.VarType.LOD_TENSOR, device='cpu') + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment1 = self._get_accumulator(self._moment1_acc_str, + param_and_grad[0]) + moment2 = self._get_accumulator(self._moment2_acc_str, + param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param_and_grad[0]) + lr = self._create_param_lr(param_and_grad) + # create the adam optimize op + + if framework.in_dygraph_mode(): + _beta1 = self._beta1 if not isinstance( + self._beta1, Variable) else self._beta1.numpy().item(0) + _beta2 = self._beta2 if not isinstance( + self._beta2, Variable) else self._beta2.numpy().item(0) + _, _, _, _, _ = core.ops.adam( + param_and_grad[0], param_and_grad[1], lr, moment1, moment2, + beta1_pow_acc, beta2_pow_acc, param_and_grad[0], moment1, + moment2, beta1_pow_acc, beta2_pow_acc, 'epsilon', self._epsilon, + 'lazy_mode', self._lazy_mode, 'min_row_size_to_use_multithread', + 1000, 'beta1', _beta1, 'beta2', _beta2) + + return None + + inputs = { + "Param": [param_and_grad[0]], + "Grad": [param_and_grad[1]], + "LearningRate": [lr], + "Moment1": [moment1], + "Moment2": [moment2], + "Beta1Pow": [beta1_pow_acc], + "Beta2Pow": [beta2_pow_acc] + } + outputs = { + "ParamOut": [param_and_grad[0]], + "Moment1Out": [moment1], + "Moment2Out": [moment2], + "Beta1PowOut": [beta1_pow_acc], + "Beta2PowOut": [beta2_pow_acc], + } + attrs = { + "epsilon": self._epsilon, + "lazy_mode": self._lazy_mode, + "min_row_size_to_use_multithread": 1000 + } + + if isinstance(self._beta1, Variable): + inputs['Beta1Tensor'] = self._beta1 + else: + attrs['beta1'] = self._beta1 + if isinstance(self._beta2, Variable): + inputs['Beta2Tensor'] = self._beta2 + else: + attrs['beta2'] = self._beta2 + + adam_op = block.append_op( + type=self.type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=True) + + return adam_op diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py new file mode 100644 index 0000000000000000000000000000000000000000..73a78b17cbba55c1ee90a2708f6c163940158a51 --- /dev/null +++ b/python/paddle/optimizer/adamax.py @@ -0,0 +1,192 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .optimizer import Optimizer +from ..fluid import core +from ..fluid import framework +from ..fluid.framework import Variable, name_scope + +__all__ = ["Adamax"] + + +class Adamax(Optimizer): + """ + The Adamax optimizer is implemented based on the Adamax Optimization + in Section 7 of `Adam paper `_. + The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm, + which makes the learning rate update algorithm more stable and simple. + + The parameter ``param_out`` update rule with gradient ``grad``: + + .. math:: + + t & = t + 1 + + moment\_out & = {\\beta}_1 * moment + (1 - {\\beta}_1) * grad + + inf\_norm\_out & = max({\\beta}_2 * inf\_norm + \epsilon, |grad|) + + learning\_rate & = \\frac{learning\_rate}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_out}{inf\_norm\_out} + + Related paper: `Adam: A Method for Stochastic Optimization `_ + + The original paper does not have an ``epsilon`` attribute, + it is added here for numerical stability to prevent the division by 0 error. + + Args: + learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. + It can be a float value or a LearningRateDecay. The default value is 0.001. + beta1 (float, optional): The exponential decay rate for the 1st moment estimates. + The default value is 0.9. + beta2 (float, optional): The exponential decay rate for the 2nd moment estimates. + The default value is 0.999. + epsilon (float, optional): A small float value for numerical stability. + The default value is 1e-08. + parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. + grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three cliping strategies + ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , + :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. + name (str, optional): Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name`. + The default value is None. + + **Notes**: + **Currently, Adamax doesn't support sparse parameter optimization.** + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + + adam = paddle.optimizer.Adamax(learning_rate=0.1, + parameters=linear.parameters(), + beta1=beta1, + beta2=beta2, + weight_decay=0.01) + out.backward() + adam.step() + adam.clear_grad() + + """ + _moment_acc_str = "moment" + _inf_norm_acc_str = "inf_norm" + _beta1_pow_acc_str = "beta1_pow_acc" + + def __init__(self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + parameters=None, + weight_decay=None, + grad_clip=None, + name=None): + assert learning_rate is not None + assert beta1 is not None + assert beta2 is not None + assert epsilon is not None + super(Adamax, self).__init__( + learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) + self.type = "adamax" + self._beta1 = beta1 + self._beta2 = beta2 + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + # Create accumulator tensors for first moment and infinity norm + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + self._add_accumulator(self._inf_norm_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + fill_value=self._beta1, + shape=[1]) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) + inf_norm = self._get_accumulator(self._inf_norm_acc_str, + param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) + # create the adamax optimize op + adamax_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": self._create_param_lr(param_and_grad), + "Moment": moment, + "InfNorm": inf_norm, + "Beta1Pow": beta1_pow_acc + }, + outputs={ + "ParamOut": param_and_grad[0], + "MomentOut": moment, + "InfNormOut": inf_norm + }, + attrs={ + "beta1": self._beta1, + "beta2": self._beta2, + "epsilon": self._epsilon + }, + stop_gradient=True) + + return adamax_op + + def _finish_update(self, block, parameters_and_grads): + """Update Beta1 Power accumulator + """ + assert isinstance(block, framework.Block) + for param, grad in parameters_and_grads: + if grad is None or param.trainable is False: + continue + with param.block.program._optimized_guard( + [param, grad]), name_scope('adamax'): + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}, + stop_gradient=True) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py new file mode 100644 index 0000000000000000000000000000000000000000..f498fcbffa24ec188b57ceb2d3c6884fc1e135d2 --- /dev/null +++ b/python/paddle/optimizer/adamw.py @@ -0,0 +1,233 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .optimizer import Optimizer +from .adam import Adam +from ..fluid import framework +import paddle +__all__ = ['AdamW'] + + +class DecoupledWeightDecay(object): + def __init__(self, coeff=0.0, apply_decay_param_fun=None, **kwargs): + if not isinstance(coeff, float) and \ + not isinstance(coeff, framework.Variable): + raise TypeError("coeff should be float or Tensor.") + self._params_name = set() + self._apply_decay_param_fun = apply_decay_param_fun + self._coeff = coeff + super(DecoupledWeightDecay, self).__init__(**kwargs) + + def _scale_parameters(self, params_and_grads): + """ + Adds weight decay ops. + scaled_parameter = parameter * coeff + + Args: + params_and_grads: A list of (parameters, gradients) pairs, + the parameters need to decay. + Raises: + Exception: The type of coeff and parameter is not consistent. + """ + if isinstance(self._coeff, float) and self._coeff == 0.0: + return + + scaled_params = [] + for param, grad in params_and_grads: + # If no gradient then we don't need to do anything + if grad is None: + continue + if self._apply_decay_param_fun is not None \ + and not self._apply_decay_param_fun(param.name): + continue + + if isinstance(self._coeff, float): + assert param.dtype is not paddle.fluid.core.VarDesc.VarType.FP32, \ + "the type of coeff(float) and parameter(%s) is not consistent."%(self._coeff.dtype) + else: + assert self._coeff.dtype == param.dtype, \ + "the type of coeff(%s) and parameter(%s) is not consistent."%(self._coeff.dtype, param.dtype) + + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + assert param.name not in self._params_name + scaled_params.append((param, grad, param * self._coeff)) + self._params_name.add(param.name) + return scaled_params + + def backward(self, **kargs): + return super(DecoupledWeightDecay, self).backward(**kargs) + + def _apply_optimize(self, **kargs): + return super(DecoupledWeightDecay, self)._apply_optimize(**kargs) + + def minimize(self, + loss, + startup_program=None, + parameters=None, + no_grad_set=None): + params_grads = self.backward( + loss=loss, + startup_program=startup_program, + parameters=parameters, + no_grad_set=no_grad_set) + scaled_params = self._scale_parameters(params_grads) + for p_grad_sgrad in scaled_params: + param, grad, scaled_param = p_grad_sgrad + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + updated_param = paddle.fluid.layers.elementwise_sub( + x=param, y=scaled_param) + paddle.fluid.layers.assign(input=updated_param, output=param) + + optimize_ops = self._apply_optimize( + loss=loss, + params_grads=params_grads, + startup_program=startup_program) + return optimize_ops, params_grads + + @framework.dygraph_only + def step(self): + parameter_list = self._parameter_list + self._dtype = None + params_grads = [] + for param in self._parameter_list: + if not param.trainable: + continue + if param._grad_ivar() is not None: + grad_var = param._grad_ivar() + params_grads.append((param, grad_var)) + + scaled_params = self._scale_parameters(params_grads) + for p_grad_sgrad in scaled_params: + param, grad, scaled_param = p_grad_sgrad + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + updated_param = paddle.fluid.layers.elementwise_sub( + x=param, y=scaled_param) + paddle.fluid.layers.assign(input=updated_param, output=param) + optimize_ops = self._apply_optimize( + loss=None, startup_program=None, params_grads=params_grads) + + def __str__(self): + return " ".join(["Weight Decay, params:", ",".join(self._params_name)]) + + +class AdamW(DecoupledWeightDecay, Adam): + """ + The AdamW optimizer is implemented based on the AdamW Optimization + in paper `DECOUPLED WEIGHT DECAY REGULARIZATION `_. + it can resolves the problem of L2 regularization failure in the Adam optimizer. + + .. math:: + + t & = t + 1 + + moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad + + moemnt\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad + + learning\_rate & = learning\_rate * \\ + \\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {beta}_1^t} + + param\_out & = param - learning\_rate * (\\frac{moment\_1}{\sqrt{moment\_2} + \epsilon} + \lambda * param) + + + Args: + learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. + It can be a float value or a LearningRateDecay. The default value is 0.001. + parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. + It should be a float number or a Tensor with shape [1] and data type as float32. + The default value is 0.9. + beta2 (float|Tensor, optional): The exponential decay rate for the 2nd moment estimates. + It should be a float number or a Tensor with shape [1] and data type as float32. + The default value is 0.999. + epsilon (float, optional): A small float value for numerical stability. + weight_decay (float|Tensor): The weight decay coefficient, it can be float or Tensor. The default value is 0.0. + The default value is 1e-08. + apply_decay_param_fun (function|None): If it is not None, + only tensors that makes apply_decay_param_fun(Tensor)==True + will be updated. It only works when we want to specify tensors. + Default: None. + grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three cliping strategies + ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , + :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. + name (str, optional): Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name`. + The default value is None. + lazy_mode (bool, optional): The official Adam algorithm has two moving-average accumulators. + The accumulators are updated at every step. Every element of the two moving-average + is updated in both dense mode and sparse mode. If the size of parameter is very large, + then the update may be very slow. The lazy mode only update the element that has + gradient in current mini-batch, so it will be much more faster. But this mode has + different semantics with the original Adam algorithm and may lead to different result. + The default value is False. + **Notes**: + **Currently, AdamW doesn't support sparse parameter optimization.** + + Examples: + .. code-block:: python + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + + adam = paddle.optimizer.AdamW(learning_rate=0.1, + parameters=linear.parameters(), + beta1=beta1, + beta2=beta2, + weight_decay=0.01) + out.backward() + adam.step() + adam.clear_grad() + + """ + + def __init__(self, + learning_rate=0.001, + parameters=None, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + weight_decay=0.0, + apply_decay_param_fun=None, + grad_clip=None, + name=None, + lazy_mode=False): + args_dict = { + "learning_rate": learning_rate, + "parameters": parameters, + "beta1": beta1, + "beta2": beta2, + "epsilon": epsilon, + "grad_clip": grad_clip, + "name": name, + "lazy_mode": lazy_mode + } + super(AdamW, self).__init__( + weight_decay, + apply_decay_param_fun=apply_decay_param_fun, + **args_dict) diff --git a/python/paddle/optimizer/lr_scheduler.py b/python/paddle/optimizer/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..d01e62abaa6374e7fde892c6ae52c16b4b0f13e2 --- /dev/null +++ b/python/paddle/optimizer/lr_scheduler.py @@ -0,0 +1,1442 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import numpy +import warnings +from paddle import Tensor + +__all__ = [ + 'NoamLR', 'PiecewiseLR', 'NaturalExpLR', 'InverseTimeLR', 'PolynomialLR', + 'LinearLrWarmup', 'ExponentialLR', 'MultiStepLR', 'StepLR', 'LambdaLR', + 'ReduceLROnPlateau', 'CosineAnnealingLR' +] + + +class _LRScheduler(object): + """LRScheduler Base class. + + Define the common interface of an LRScheduler. + User can 'form paddle.optimizer.lr_scheduler import _LRScheduler' + And inherit from it to have a custom implementation of get_lr(). + """ + + def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False): + if not isinstance(learning_rate, (float, int)): + raise TypeError( + "The type of learning rate must be float, but received {}". + format(type(learning_rate))) + self.base_lr = float(learning_rate) + self.last_lr = float(learning_rate) + self.last_epoch = last_epoch + self.verbose = verbose + self._var_name = None + + self.step() + + def __call__(self): + """ + Return last computed learning rate on current epoch. + """ + return self.last_lr + + def step(self, epoch=None): + """ + 'step' should be called after 'minimize' . It will update the learning rate in optimizer according to 'epoch'. + The new learning rate will take effect on next epoch. + + Args: + epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1. + + Returns: + None + + Examples: + Please refer to the example of current _LRScheduler. + """ + if epoch is None: + self.last_epoch += 1 + self.last_lr = self.get_lr() + else: + self.last_epoch = epoch + if hasattr(self, "_get_closed_form_lr"): + self.last_lr = self._get_closed_form_lr() + else: + self.last_lr = self.get_lr() + + if self.verbose: + print('Epoch {}: {} set learning rate to {}.'.format( + self.last_epoch, self.__class__.__name__, self.last_lr)) + + def state_dict(self): + """ + Returns the state of the scheduler as a :class:`dict`. + + It is a subset of self.__dict__ . + """ + self._state_keys() + state_dict = {} + for key in self.keys: + if key not in self.__dict__: + continue + value = self.__dict__[key] + if isinstance(value, Tensor): + assert value.shape == [ + 1 + ], "shape of Tensor in state_dict must be [1] {}".format( + value.shape) + value = value.numpy()[0] + state_dict[key] = value + + return state_dict + + # For those subclass who overload _LRScheduler, "last_epoch, last_lr" will be saved by default. + # (Note): you can change it for your subclass. + def _state_keys(self): + """ + set the keys in self.__dict__ that are needed to be saved. + """ + self.keys = ['last_epoch', 'last_lr'] + + def set_dict(self, state_dict): + """ + Loads the schedulers state. + """ + self._state_keys() + for key in self.keys: + if key in state_dict: + self.__dict__[key] = state_dict[key] + else: + raise RuntimeError( + "Please check whether state_dict is correct for optimizer. Can't find [ {} ] in state_dict". + format(key)) + if len(state_dict) > len(self.keys): + warnings.warn( + "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" + ) + + # alias for set_dict + set_state_dict = set_dict + + def get_lr(self): + # calculate by python float + raise NotImplementedError + + +class NoamLR(_LRScheduler): + """ + + Applies Noam Lear to the initial learning rate. + + The algorithm can be described as following. + + .. math:: + + new\_learning\_rate = learning\_rate * d_{model}^{-0.5} * min(epoch^{-0.5}, epoch * warmup\_steps^{-1.5}) + + Please reference `attention is all you need `_ + + + Args: + d$_{model}$(int): The dimensionality of input and output feature vector of model. It is a python int number. + warmup_steps(int): The number of warmup steps. A super parameter. It is a python int number + learning_rate (float): The initial learning rate. It is a python float number. Default: 1.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``NoamLR`` instance to schedule learning rate. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.NoamLR(d_model=0.01, warmup_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on static mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.NoamLR(d_model=0.01, warmup_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + + """ + + def __init__(self, + d_model, + warmup_steps, + learning_rate=1.0, + last_epoch=-1, + verbose=False): + self.d_model = d_model + self.warmup_steps = warmup_steps + super(NoamLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + if self.last_epoch == 0: + a = 1 + else: + a = self.last_epoch**-0.5 + b = self.warmup_steps**-1.5 * self.last_epoch + return self.base_lr * (self.d_model**-0.5) * min(a, b) + + +class PiecewiseLR(_LRScheduler): + """ + + Piecewise learning rate scheduler. + + The algorithm can be described as the code below: + + .. code-block:: text + + boundaries = [100, 200] + values = [1.0, 0.5, 0.1] + if epoch < 100: + learning_rate = 1.0 + elif 100 <= global_step < 200: + learning_rate = 0.5 + else: + learning_rate = 0.1 + + Args: + boundaries(list): A list of steps numbers. The type of element in the list is python int. + values(list): A list of learning rate values that will be picked during different epoch boundaries. + The type of element in the list is python float. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``PiecewiseLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.PiecewiseLR(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on static mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.PiecewiseLR(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, boundaries, values, last_epoch=-1, verbose=False): + self.boundaries = boundaries + self.values = values + super(PiecewiseLR, self).__init__( + last_epoch=last_epoch, verbose=verbose) + + def get_lr(self): + + for i in range(len(self.boundaries)): + if self.last_epoch < self.boundaries[i]: + return self.values[i] + return self.values[len(self.values) - 1] + + +class NaturalExpLR(_LRScheduler): + """ + + Applies natural exponential decay to the initial learning rate. + + The algorithm can be described as following: + + .. math:: + + new\_learning\_rate = learning\_rate * e^{- gama * epoch} + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + gamma (float, optional): A Ratio to update the learning rate. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``NaturalExpLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.NaturalExpLR(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on static mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.NaturalExpLR(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): + self.gamma = gamma + super(NaturalExpLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + return self.base_lr * math.exp(-1 * self.gamma * self.last_epoch) + + +class InverseTimeLR(_LRScheduler): + """ + + Applies inverse time decay to the initial learning rate. + + The algorithm can be described as following: + + .. math:: + + new\_learning\_rate = \\frac{learning\_rate}{1 + gamma * epoch} + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``InverseTimeLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.InverseTimeLR(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on static mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.InverseTimeLR(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + + """ + + def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): + self.gamma = gamma + super(InverseTimeLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + return self.base_lr / (1 + self.gamma * self.last_epoch) + + +class PolynomialLR(_LRScheduler): + """ + + Applies polynomial decay to the initial learning rate. + + The algorithm can be described as following. + + If cycle is set to True, then: + + .. math:: + + decay\_steps & = decay\_steps * math.ceil(\\frac{epoch}{decay\_steps}) + + new\_learning\_rate & = (learning\_rate-end\_lr)*(1-\\frac{epoch}{decay\_steps})^{power}+end\_lr + + If cycle is set to False, then: + + .. math:: + + epoch & = min(epoch, decay\_steps) + + new\_learning\_rate & = (learning\_rate-end\_lr)*(1-\\frac{epoch}{decay\_steps})^{power}+end\_lr + + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + decay_steps(int): The decay step size. It determines the decay cycle. + end_lr(float, optional): The minimum final learning rate. Default: 0.0001. + power(float, optional): Power of polynomial. Default: 1.0. + cycle(bool, optional): Whether the learning rate rises again. If True, then the learning rate will rise when it decrease + to ``end_lr`` . If False, the learning rate is monotone decreasing. Default: False. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``PolynomialLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.PolynomialLR(learning_rate=0.5, decay_steps=20, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.PolynomialLR(learning_rate=0.5, decay_steps=20, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, + learning_rate, + decay_steps, + end_lr=0.0001, + power=1.0, + cycle=False, + last_epoch=-1, + verbose=False): + self.decay_steps = decay_steps + self.end_lr = end_lr + self.power = power + self.cycle = cycle + super(PolynomialLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + tmp_epoch_num = self.last_epoch + tmp_decay_steps = self.decay_steps + if self.cycle: + div_res = math.ceil( + float(self.last_epoch) / float(self.decay_steps)) + + if self.last_epoch == 0: + div_res = 1 + tmp_decay_steps = self.decay_steps * div_res + else: + tmp_epoch_num = min(self.last_epoch, self.decay_steps) + + return (self.base_lr - self.end_lr) * ( + (1 - float(tmp_epoch_num) / float(tmp_decay_steps) + )**self.power) + self.end_lr + + +class LinearLrWarmup(_LRScheduler): + """ + + Linear learning rate warm up strategy. Update the learning rate preliminarily before the normal learning rate scheduler. + For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks `_ + + When epoch < warmup_steps, learning rate is updated as: + + .. code-block:: text + + lr = start_lr + (end_lr - start_lr) * (epoch / warmup_steps) + + where start_lr is the initial learning rate, and end_lr is the final learning rate; + + When epoch >= warmup_steps, learning rate is updated as: + + .. code-block:: text + + lr = learning_rate + + where lr is float or any subclass of ``_LRScheduler`` . + + Args: + learning_rate (float|_LRScheduler): The learning rate after warm-up. It is a python float number or any subclass of ``_LRScheduler`` . + warmup_steps (int): total steps of warm up. + start_lr (float): Initial learning rate of warm up. + end_lr (float): Final learning rate of warm up. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``LinearLrWarmup`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.LinearLrWarmup( + learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.LinearLrWarmup( + learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, + learning_rate, + warmup_steps, + start_lr, + end_lr, + last_epoch=-1, + verbose=False): + type_check = isinstance(learning_rate, float) or isinstance( + learning_rate, int) or isinstance(learning_rate, _LRScheduler) + if not type_check: + raise TypeError( + "the type of learning_rate should be [int, float or _LRScheduler], the current type is {}". + format(learning_rate)) + self.learning_rate = learning_rate + self.warmup_steps = warmup_steps + self.start_lr = start_lr + self.end_lr = end_lr + assert end_lr > start_lr, "end_lr {} must be greater than start_lr {}".format( + end_lr, start_lr) + super(LinearLrWarmup, self).__init__(start_lr, last_epoch, verbose) + + def get_lr(self): + if self.last_epoch < self.warmup_steps: + return (self.end_lr - self.start_lr) * float( + self.last_epoch) / float(self.warmup_steps) + self.start_lr + else: + if isinstance(self.learning_rate, _LRScheduler): + self.learning_rate.step() + return self.learning_rate() + + return self.learning_rate + + +class ExponentialLR(_LRScheduler): + """ + + Update learning rate by 'gamma' each epoch. + + The algorithm can be described as following. + + .. math:: + + new\_learning\_rate = last\_learning\_rate * gamma + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``ExponentialLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.ExponentialLR(learning_rate=0.5, gamma=0.9, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.ExponentialLR(learning_rate=0.5, gamma=0.9, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): + self.gamma = gamma + super(ExponentialLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + return self.base_lr * (self.gamma**self.last_epoch) + + +class MultiStepLR(_LRScheduler): + """ + Update the learning rate by ``gama`` once ``epoch`` reaches one of the milestones. + + The algorithm can be described as the code below. + + .. code-block:: text + + learning_rate = 0.5 + milestones = [30, 50] + gamma = 0.1 + if epoch < 30: + learning_rate = 0.5 + elif epoch < 50: + learning_rate = 0.05 + else: + learning_rate = 0.005 + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + milestones (tuple|list): List or tuple of each boundaries. Must be increasing. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + + Returns: + ``MultiStepLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.MultiStepLR(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.MultiStepLR(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, + learning_rate, + milestones, + gamma=0.1, + last_epoch=-1, + verbose=False): + if not isinstance(milestones, (tuple, list)): + raise TypeError( + "The type of 'milestones' in 'MultiStepDecay' must be 'tuple, list', but received %s." + % type(milestones)) + + if not all([ + milestones[i] < milestones[i + 1] + for i in range(len(milestones) - 1) + ]): + raise ValueError('The elements of milestones must be incremented') + if gamma >= 1.0: + raise ValueError('gamma should be < 1.0.') + + self.milestones = milestones + self.gamma = gamma + super(MultiStepLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + for i in range(len(self.milestones)): + if self.last_epoch < self.milestones[i]: + return self.base_lr * (self.gamma**i) + return self.base_lr * (self.gamma**len(self.milestones)) + + +class StepLR(_LRScheduler): + """ + Update the learning rate of ``optimizer`` by ``gamma`` every ``step_size`` number of epoch. + + The algorithm can be described as the code below. + + .. code-block:: text + + learning_rate = 0.5 + step_size = 30 + gamma = 0.1 + + learning_rate = 0.5 if epoch < 30 + learning_rate = 0.05 if 30 <= epoch < 60 + learning_rate = 0.005 if 60 <= epoch < 90 + ... + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + step_size (int): the interval to update. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``StepLR`` instance to schedule learning rate. + + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.StepLR(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, + learning_rate, + step_size, + gamma=0.1, + last_epoch=-1, + verbose=False): + if not isinstance(step_size, int): + raise TypeError( + "The type of 'step_size' must be 'int', but received %s." % + type(step_size)) + if gamma >= 1.0: + raise ValueError('gamma should be < 1.0.') + + self.step_size = step_size + self.gamma = gamma + super(StepLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + i = self.last_epoch // self.step_size + return self.base_lr * (self.gamma**i) + + +class LambdaLR(_LRScheduler): + """ + Sets the learning rate of ``optimizer`` by function ``lr_lambda`` . ``lr_lambda`` is funciton which receives ``epoch`` . + + The algorithm can be described as the code below. + + .. code-block:: text + + learning_rate = 0.5 # init learning_rate + lr_lambda = lambda epoch: 0.95 ** epoch + + learning_rate = 0.5 # epoch 0 + learning_rate = 0.475 # epoch 1 + learning_rate = 0.45125 # epoch 2 + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + lr_lambda (function): A function which computes a factor by ``epoch`` , and then multiply the initial learning rate by this factor. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``LambdaLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.LambdaLR(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.LambdaLR(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + + """ + + def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): + if not callable(lr_lambda): + raise TypeError( + "The type of 'lr_lambda' in 'LambdaLR' must be 'function', but received %s." + % type(lr_lambda)) + + self.lr_lambda = lr_lambda + super(LambdaLR, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + return self.base_lr * self.lr_lambda(self.last_epoch) + + +class ReduceLROnPlateau(_LRScheduler): + """ + Reduce learning rate when ``metrics`` has stopped descending. Models often benefit from reducing the learning rate + by 2 to 10 times once model performance has no longer improvement. + + The ``metrics`` is the one which has been pass into ``step`` , it must be 1-D Tensor with shape [1]. When ``metrics`` + stop descending for a ``patience`` number of epochs, the learning rate will be reduced to ``learning_rate * factor`` . + (Specially, ``mode`` can also be set to ``'max`` , in this case, when ``metrics`` stop ascending for a ``patience`` + number of epochs, the learning rate will be reduced.) + + In addition, After each reduction, it will wait a ``cooldown`` number of epochs before resuming above operation. + + Args: + learning_rate (float): The initial learning rate. It is a python float number. + mode (str, optional): ``'min'`` or ``'max'`` can be selected. Normally, it is ``'min'`` , which means that the + learning rate will reduce when ``loss`` stops descending. Specially, if it's set to ``'max'`` , the learning + rate will reduce when ``loss`` stops ascending. Default: ``'min'`` . + factor (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * factor`` . + It should be less than 1.0. Default: 0.1. + patience (int, optional): When ``loss`` doesn't improve for this number of epochs, learing rate will be reduced. + Default: 10. + threshold (float, optional): ``threshold`` and ``threshold_mode`` will determine the minimum change of ``loss`` . + This make tiny changes of ``loss`` will be ignored. Default: 1e-4. + threshold_mode (str, optional): ``'rel'`` or ``'abs'`` can be selected. In ``'rel'`` mode, the minimum change of ``loss`` + is ``last_loss * threshold`` , where ``last_loss`` is ``loss`` in last epoch. In ``'abs'`` mode, the minimum + change of ``loss`` is ``threshold`` . Default: ``'rel'`` . + cooldown (int, optional): The number of epochs to wait before resuming normal operation. Default: 0. + min_lr (float, optional): The lower bound of the learning rate after reduction. Default: 0. + epsilon (float, optional): Minimal decay applied to lr. If the difference between new and old lr is smaller than eps, the update is + ignored. Default: 1e-8. + verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False``. + + + Returns: + ``ReduceLROnPlateau`` instance to schedule learning rate. + + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step(loss) + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step(out[0]) + + """ + + def __init__(self, + learning_rate, + mode='min', + factor=0.1, + patience=10, + threshold=1e-4, + threshold_mode='rel', + cooldown=0, + min_lr=0, + epsilon=1e-8, + verbose=False): + mode = mode.lower() + if mode not in ['min', 'max']: + raise ValueError('mode: ' + mode + ' is unknown!') + self.mode = mode + + if factor >= 1.0: + raise ValueError( + 'new_lr = origin_lr * gamma and gamma should be < 1.0.') + self.factor = factor + + threshold_mode = threshold_mode.lower() + if threshold_mode not in ['rel', 'abs']: + raise ValueError('threshold mode: ' + threshold_mode + + ' is unknown!') + self.threshold_mode = threshold_mode + if not isinstance(learning_rate, (float, int)): + raise TypeError( + "The type of 'learning_rate' in 'ReduceLROnPlateau' must be 'float', but received %s." + % type(learning_rate)) + + self.verbose = verbose + self.patience = patience + self.threshold = threshold + self.threshold_mode = threshold_mode + self.cooldown = cooldown + self.min_lr = min_lr + self.epsilon = epsilon + + self.cooldown_counter = 0 + self.best = None + self.num_bad_epochs = 0 + + # Can not call Parent __init__, so implement here. + self.base_lr = float(learning_rate) + self.last_lr = float(learning_rate) + self.last_epoch = 0 + self.verbose = verbose + self._var_name = None + + # "cooldown_counter / best / num_bad_epochs / last_epoch / last_lr" will be stored. + def _state_keys(self): + self.keys = [ + 'cooldown_counter', 'best', 'num_bad_epochs', 'last_epoch', + 'last_lr' + ] + + def step(self, metrics, epoch=None): + """ + step should be called after 'minimize' . It will update the learning rate in optimizer according to ``metrics`` . + The new learning rate will take effect on next epoch. + + Args: + metrics (Tensor|numpy.ndarray|float): Which will be monitored to determine whether the learning rate will reduce. + If it stop descending for a ``patience`` number of epochs, the learning rate will reduce. If it's 'Tensor' or + 'numpy.ndarray', its shape must be [1]. + epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1. + + Returns: + None + + Examples: + Please refer to the example of current _LRScheduler. + """ + if epoch is None: + self.last_epoch = self.last_epoch + 1 + else: + self.last_epoch = epoch + + # loss must be 1-D Tensor with shape [1] + if isinstance(metrics, (Tensor, numpy.ndarray)): + assert len(metrics.shape) == 1 and metrics.shape[0] == 1, "the metrics.shape " \ + "should be (1L,), but the current metrics.shape is {}. Maybe that " \ + "you should call paddle.mean to process it first.".format(loss.shape) + elif not isinstance(metrics, + (int, float, numpy.float32, numpy.float64)): + raise TypeError( + "metrics must be 'int', 'float', 'np.float', 'numpy.ndarray' or 'paddle.Tensor', but receive {}". + format(type(metrics))) + + if self.cooldown_counter > 0: + self.cooldown_counter -= 1 + else: + if self.best is None or self._is_better(metrics, self.best): + self.best = metrics + self.num_bad_epochs = 0 + else: + self.num_bad_epochs += 1 + + if self.num_bad_epochs > self.patience: + self.cooldown_counter = self.cooldown + self.num_bad_epochs = 0 + new_lr = max(self.last_lr * self.factor, self.min_lr) + if self.last_lr - new_lr > self.epsilon: + self.last_lr = new_lr + if self.verbose: + print('Epoch {}: {} set learning rate to {}.'.format( + self.last_epoch, self.__class__.__name__, + self.last_lr)) + + def _is_better(self, current, best): + print("mode", self.mode, 'threshold_mode', self.threshold_mode) + if self.mode == 'min' and self.threshold_mode == 'rel': + return current < best - best * self.threshold + + elif self.mode == 'min' and self.threshold_mode == 'abs': + return current < best - self.threshold + + elif self.mode == 'max' and self.threshold_mode == 'rel': + return current > best + best * self.threshold + + else: + return current > best + self.threshold + + +class CosineAnnealingLR(_LRScheduler): + """ + + Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to + the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in + SGDR: + + \begin{aligned} + \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 + + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right), + & T_{cur} \neq (2k+1)T_{max}; \\ + \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min}) + \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right), + & T_{cur} = (2k+1)T_{max}. + \end{aligned} + + The algorithm can be described as following. + + .. math:: + \begin{aligned} + \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 + + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right), + & T_{cur} \neq (2k+1)T_{max}; \\ + \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min}) + \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right), + & T_{cur} = (2k+1)T_{max}. + \end{aligned} + + It has been proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts `_. + Note that this only implements the cosine annealing part of SGDR, and not the restarts. + + Args: + learning_rate (float): The initial learning rate, that is :math:`\eta_{max}` . It can be set to python float or int number. + T_max (int): Maximum number of iterations. It is half of the decay cycle of learning rate. + eta_min (float|int, optional): Minimum learning rate, that is :math:`\eta_{min}` . Default: 0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``CosineAnnealingLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dygraph mode + paddle.disable_static() + x = np.random.uniform(-1, 1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.CosineAnnealingLR(learning_rate=0.5, T_max=10, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameter_list=linear.parameters()) + for epoch in range(20): + for batch_id in range(2): + x = paddle.to_tensor(x) + out = linear(x) + loss = paddle.reduce_mean(out) + out.backward() + sgd.minimize(loss) + linear.clear_gradients() + scheduler.step() + + # train on statich mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[-1, 4, 5]) + y = paddle.static.data(name='y', shape=[-1, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.CosineAnnealingLR(learning_rate=0.5, T_max=10, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + lr_var = sgd._global_learning_rate() + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(2): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=lr_var.name) + scheduler.step() + """ + + def __init__(self, + learning_rate, + T_max, + eta_min=0, + last_epoch=-1, + verbose=False): + if not isinstance(T_max, int): + raise TypeError( + "The type of 'T_max' in 'CosineAnnealingLR' must be 'int', but received %s." + % type(T_max)) + if not isinstance(eta_min, (float, int)): + raise TypeError( + "The type of 'eta_min' in 'CosineAnnealingLR' must be 'float, int', but received %s." + % type(eta_min)) + self.T_max = T_max + self.eta_min = float(eta_min) + super(CosineAnnealingLR, self).__init__(learning_rate, last_epoch, + verbose) + + def get_lr(self): + if self.last_epoch == 0: + return self.base_lr + elif (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0: + return self.last_lr + (self.base_lr - self.eta_min) * (1 - math.cos( + math.pi / self.T_max)) / 2 + + return (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / ( + 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max)) * ( + self.last_lr - self.eta_min) + self.eta_min + + def _get_closed_form_lr(self): + return self.eta_min + (self.base_lr - self.eta_min) * (1 + math.cos( + math.pi * self.last_epoch / self.T_max)) / 2 diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..2c2f6f1ce7e14ff5960a4bf492e9cf9158ac727c --- /dev/null +++ b/python/paddle/optimizer/optimizer.py @@ -0,0 +1,995 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import six +import logging +from collections import defaultdict + +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table +from paddle.fluid.framework import Program, Variable, name_scope, default_main_program, default_startup_program, device_guard + +from ..fluid import framework +from ..fluid import layers +from ..fluid import unique_name +from ..fluid.backward import append_backward, _some_in_set_, _append_grad_suffix_, _get_no_grad_set_name +from ..fluid.clip import GradientClipBase, GradientClipByNorm, error_clip_callback, append_gradient_clip_ops +from ..fluid.framework import program_guard +from ..fluid.initializer import Constant +from ..fluid.layer_helper import LayerHelper +from ..fluid.layers import ops +from ..fluid.regularizer import append_regularization_ops +from ..fluid.dygraph import base as imperative_base +from ..fluid.dygraph import no_grad +from ..fluid.dygraph.learning_rate_scheduler import LearningRateDecay, _LearningRateEpochDecay +from paddle.fluid import core +from paddle.fluid.layers import tensor +from functools import reduce +from ..fluid.wrapped_decorator import signature_safe_contextmanager +from .. import compat as cpt + +__all__ = ['Optimizer'] + + +class Optimizer(object): + """Optimizer Base class. + + Define the common interface of an optimizer. + User should not use this class directly, + but need to use one of it's implementation. + + Args: + learning_rate (float|LearningRateDecay): The learning rate used to update ``Parameter``. + It can be a float value or a LearningRateDecay. + parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. + grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of \ + some derived class of ``GradientClipBase`` . There are three cliping strategies \ + ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , \ + :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. + name (str, optional): Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name`. + The default value is None. + + Returns: + Base class for optimizer. + + Examples: + .. code-block:: python + + #Take the subclass adam as an example + #Optimizer + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + adam = paddle.optimizer.Adam(learning_rate=0.1, + parameters=linear.parameters()) + out.backward() + adam.step() + adam.clear_grad() + + """ + + @imperative_base.no_grad() + def __init__(self, + learning_rate, + parameters=None, + weight_decay=None, + grad_clip=None, + name=None): + self._parameter_list = list( + parameters) if parameters is not None else None + self._name = name + if framework.in_dygraph_mode(): + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, LearningRateDecay): + raise TypeError( + "learning rate should be float or LearningRateDecay, got %s here" + % type(learning_rate)) + if self._parameter_list is None: + raise AttributeError( + "parameters argument given to the Optimizer should not be None in dygraph mode." + ) + if weight_decay is not None: + for param in self._parameter_list: + if param.regularizer is not None: + logging.info( + "If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. " + "The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" + % weight_decay.__str__()) + break + else: + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, framework.Variable): + raise TypeError( + "learning rate should be float or Tensor, got %s here" % + type(learning_rate)) + + if grad_clip is not None: + if not isinstance(grad_clip, GradientClipBase): + raise TypeError( + "'grad_clip' should be an instance of GradientClipBase's derived class" + ) + if isinstance(weight_decay, float): + from ..fluid.regularizer import L2Decay + self.regularization = L2Decay(weight_decay) + else: + self.regularization = weight_decay + self._grad_clip = grad_clip + self._learning_rate = learning_rate + # the learning rate type should be inferenced from loss + self._dtype = None + # each program should have a independent learning rate + # program -> tensor(learning_rate) + self._learning_rate_map = dict() + if isinstance(self._learning_rate, framework.Variable): + self._learning_rate_map[framework.default_main_program( + )] = self._learning_rate + # Dictionary of accumulators. Some optimizer subclasses need to + # allocate and manage extra tensors associated with the parameters + # to train. These tensors are called accumulators. + # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...} + self._accumulators = defaultdict(lambda: dict()) + self.helper = None + self._opti_name_list = [] + self._accumulators_holder = {} + self._param_device_map = dict() + self.clear_gradients = self.clear_grad + + @framework.dygraph_only + def state_dict(self): + ''' + Get state dict information from optimizer. It contain all the tensor used by optimizer. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be include in state dict. + If the optimizer never be called(minimize function), the state_dict is empty. + + Args: + None + + Returns: + state_dict(dict) : dict contains all the Tensor used by optimizer + + Examples: + .. code-block:: python + + import paddle + paddle.disable_static() + emb = paddle.nn.Embedding([10, 10]) + + adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) + state_dict = adam.state_dict() + + ''' + state_dict = {} + for k, v in self._accumulators.items(): + for para_name, var_tmp in v.items(): + state_dict[var_tmp.name] = var_tmp + # global step if use lr decay + if isinstance(self._learning_rate, LearningRateDecay): + state_dict["LR_Scheduler"] = self._learning_rate.state_dict() + + if not isinstance(self._learning_rate, _LearningRateEpochDecay): + var_tmp = None + var_temp = framework._varbase_creator( + None, name='global_step', dtype='int32') + + tensor.fill_constant( + [1], "int32", self._learning_rate.step_num, out=var_temp) + + state_dict['global_step'] = var_temp + return state_dict + + @framework.dygraph_only + def set_state_dict(self, state_dict): + ''' + Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. + + Args: + state_dict(dict) : Dict contains all the Tensor needed by optimizer + Return: + None + + Examples: + .. code-block:: python + + import paddle + paddle.disable_static() + emb = paddle.nn.Embedding([10, 10]) + + state_dict = emb.state_dict() + paddle.framework.save(state_dict, "paddle_dy") + + adam = paddle.optimizer.Adam(learning_rate=paddle.nn.functional.noam_decay( 100, 10000), + parameters=emb.parameters()) + state_dict = adam.state_dict() + paddle.framework.save(state_dict, "paddle_dy") + + para_state_dict, opti_state_dict = paddle.framework.load( "paddle_dy") + + adam.set_state_dict(opti_state_dict) + + ''' + + if isinstance(self._learning_rate, LearningRateDecay): + self._learning_rate.set_dict(state_dict["LR_Scheduler"]) + + if not isinstance(self._learning_rate, _LearningRateEpochDecay): + assert 'global_step' in state_dict, \ + 'Global step not in state dict, Dygraph use LearningRateDecay, global_step must in state_dict' + global_step = state_dict['global_step'] + + if isinstance(global_step, Variable): + step_np = global_step + step_np = np.array(step_np.value().get_tensor()) + assert step_np.shape == (1,), \ + "global step shape is (1,), the shape is {}".format( step_np.shape ) + + self._learning_rate.step_num = int(step_np[0]) + elif isinstance(global_step, np.ndarray): + assert global_step.shape == (1,), \ + "global step shape is (1,), the shape is {}".format( global_step.shape ) + self._learning_rate.step_num = global_step[0] + else: + raise RuntimeError( + "Type not supprt, value in state dict must be [VarBase, Tensor, numpy], the type is ", + type(global_step)) + + self._accumulators_holder = state_dict + for k, v in self._accumulators.items(): + for para_name, var_tmp in v.items(): + assert var_tmp.name in state_dict, \ + "optimizer Tensor {} not found".format( var_tmp.name ) + var = var_tmp.value() + tensor = var.get_tensor() + model_np = np.array(tensor) + + load_para = state_dict[var_tmp.name] + + if isinstance(load_para, Variable): + load_para_np = load_para.numpy() + elif isinstance(load_para, core.VarBase): + load_para_np = load_para.numpy() + elif isinstance(load_para, np.ndarray): + load_para_np = load_para + else: + raise RuntimeError("State dict type {} not supprt".format( + str(type(load_para)))) + + assert model_np.shape == load_para_np.shape, \ + "Parameter shape not match, Dygraph Parameter [ {} ] need tensor with shape {} but load tensor with shape {}".format( + item.name, model_np.shape, load_para_np.shape) + + assert model_np.dtype == load_para_np.dtype, \ + "Parameter dtype not match, Dygraph Parameter [ {} ] need tensor with dtype {} but load tensor with dtype {}".format( + item.name, model_np.dtype, load_para_np.dtype) + + tensor.set(load_para_np, framework._current_expected_place()) + + def get_opti_var_name_list(self): + return self._opti_name_list + + def _create_global_learning_rate(self): + if imperative_base.enabled(): + # create learning rate tensor + if isinstance(self._learning_rate, float): + lr = self._global_learning_rate() + + if isinstance(lr, framework.Variable): + return + else: + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32' if self._dtype is None else self._dtype, + persistable=True) + # get learning rate Tensor from LearningRateDecay + elif isinstance(self._learning_rate, LearningRateDecay): + self._learning_rate_map[framework.default_main_program( + )] = self._learning_rate() + else: + raise TypeError( + "optimizer's learning rate must be float or LearningRateDecay" + ) + else: + lr = self._global_learning_rate() + + if isinstance(lr, framework.Variable): + return + else: + if not isinstance(self._learning_rate, float): + raise TypeError( + "learning rate Tensor is create outside optimizer," + "can not create new learning rate Tensor for new program" + ) + + # create learning rate in the current main program + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32' if self._dtype is None else self._dtype, + persistable=True) + + @framework.dygraph_only + def set_lr(self, value): + """ + :api_attr: imperative + + Set the value of the learning rate manually in the optimizer. If the optimizer use LearningRateDecay, + this API cannot be invoked, because it will lead to conflict. + + Args: + value (float|Tensor): the value of learning rate + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + + adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) + + # set learning rate manually by python float value + lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] + for i in range(5): + adam.set_lr(lr_list[i]) + lr = adam.get_lr() + print("current lr is {}".format(lr)) + # Print: + # current lr is 0.2 + # current lr is 0.3 + # current lr is 0.4 + # current lr is 0.5 + # current lr is 0.6 + + + # set learning rate manually by framework Tensor + lr_var = paddle.create_global_var( + shape=[1], value=0.7, dtype='float32') + adam.set_lr(lr_var) + lr = adam.get_lr() + print("current lr is {}".format(lr)) + # Print: + # current lr is 0.7 + + + + """ + if not isinstance(value, (framework.Variable, float)): + raise TypeError( + "The type of 'value' in optimizer.set_lr must be (float, Tensor), but received %s." + % (type(value))) + if isinstance(self._learning_rate, LearningRateDecay): + raise RuntimeError( + "optimizer's learning rate can't be LearningRateDecay when invoke this API, because this will lead to conflict." + ) + if isinstance(value, float): + self._learning_rate = value + current_lr = self._global_learning_rate() + if current_lr is not None: + global_block = framework.default_main_program().global_block() + global_block.append_op( + type='fill_constant', + outputs={'Out': [current_lr]}, + attrs={ + 'dtype': current_lr.dtype, + 'shape': list(current_lr.shape), + 'value': float(value) + }, + stop_gradient=True) + else: + assert len(value.shape) == 1 and value.shape[ + 0] == 1, "optimizer's learning rate must be 1-D Tensor with shape[1]" + self._learning_rate_map[framework.default_main_program()] = value + + @framework.dygraph_only + def get_lr(self): + """ + :api_attr: imperative + + Get current step learning rate. The return value is all the same When LearningRateDecay is not used, + otherwise return the step learning rate. + + Returns: + float: The learning rate of the current step. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + # example1: LearningRateDecay is not used, return value is all the same + paddle.disable_static() + emb = paddle.nn.Embedding([10, 10]) + adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters()) + lr = adam.get_lr() + print(lr) # 0.001 + + # example2: PiecewiseDecay is used, return the step learning rate + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.reduce_mean(out) + + bd = [2, 4, 6, 8] + value = [0.2, 0.4, 0.6, 0.8, 1.0] + adam = paddle.optimizer.Adam(paddle.PiecewiseDecay(bd, value, 0), + parameters=linear.parameters()) + + # first step: learning rate is 0.2 + np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0) # True + + # learning rate for different steps + ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0] + for i in range(12): + adam.step() + lr = adam.get_lr() + np.allclose(lr, ret[i], rtol=1e-06, atol=0.0) # True + + """ + current_lr = self._global_learning_rate() + if isinstance(current_lr, framework.Variable): + return self._global_learning_rate().numpy()[0] + + if isinstance(self._learning_rate, float): + return self._learning_rate + elif isinstance(self._learning_rate, _LearningRateEpochDecay): + step_lr = self._learning_rate() + return step_lr.numpy()[0] + else: + step_lr = self._learning_rate.step() + if isinstance(step_lr, (float, int)): + return step_lr + else: + return step_lr.numpy()[0] + + def _global_learning_rate(self, program=None): + """ + get global decayed learning rate + :return: + """ + if program is None: + program = framework.default_main_program() + return self._learning_rate_map.get(program, None) + + def _append_optimize_op(self, block, param_and_grad): + """ append optimize operator to block and return all the added optimize_op + """ + raise NotImplementedError( + "Class \"Optimizer\" connot be used directly as an optimizer, please use its subclasses such as \"Adam\"" + ) + + def _create_param_lr(self, param_and_grad): + # create learning rate tensor for every parameter + param = param_and_grad[0] + param_lr = param.optimize_attr['learning_rate'] + if type(param_lr) == Variable: + return param_lr + else: + if param_lr == 1.0: + return self._global_learning_rate() + else: + with default_main_program()._lr_schedule_guard( + is_with_opt=True), framework.name_scope( + 'scale_with_param_lr'): + return self._global_learning_rate() * param_lr + + def _create_accumulators(self, block, parameters): + """Create all accumulators needed by the parameters + + Args: + block: the block in which the loss tensor is present + parameters: list of parameter tensors for the optimizer + """ + pass + + def _finish_update(self, block, parameters_and_grads): + """Finish any custom updates needed + before completing an optimization step + + Args: + block: the block in which the loss tensor is present + parameters: list of parameter tensors for the optimizer + + Returns: + None + """ + pass + + def _add_accumulator(self, + name, + param, + dtype=None, + fill_value=0.0, + shape=None, + type=None, + device=None): + """Utility function to add an accumulator for a parameter + + Args: + block: the block in which the loss tensor is present + name: name of the accumulator + param: parameter tensor for which accumulator is to be added + dtype: data type of the accumulator tensor + fill_value: value to initialize the accumulator tensor + """ + if self._name is not None: + name = self._name + "_" + name + if (name in self._accumulators and + param.name in self._accumulators[name]): + if framework.in_dygraph_mode(): + return self._accumulators[name][param.name] + raise Exception("Accumulator {} already exists for parameter {}". + format(name, param.name)) + if shape == None: + shape = param.shape + assert isinstance(self.helper, LayerHelper) + + var_name = param.name + "_" + name + var_name = unique_name.generate(var_name) + self._opti_name_list.append(var_name) + + var = self.helper.create_global_variable( + name=var_name, + persistable=True, + dtype=dtype or param.dtype, + type=param.type if type is None else type, + shape=shape, + belong_to_optimizer=True) + if device is None: + device = self._get_device_for_param(param.name) + with device_guard(device): + self.helper.set_variable_initializer( + var, initializer=Constant(value=float(fill_value))) + + if framework.in_dygraph_mode(): + if len(self._accumulators_holder) > 0: + assert var_name in self._accumulators_holder, \ + "Optimizer set error, {} should in state dict".format( var_name ) + var.set_value(self._accumulators_holder[var_name]) + + self._accumulators[name][param.name] = var + return var + + def _get_accumulator(self, name, param): + """Utility function to fetch an accumulator for a parameter + + Args: + name: name of the accumulator + param: parameter tensor for which accumulator is to be fetched + + Returns: + accumulator tensor for the parameter + """ + if self._name is not None: + name = self._name + "_" + name + if (name not in self._accumulators or + param.name not in self._accumulators[name]): + raise Exception("Accumulator {} does not exist for parameter {}". + format(name, param.name)) + return self._accumulators[name][param.name] + + def _update_param_device_map(self, parameters_and_grads, target_block): + for param_and_grad in parameters_and_grads: + if param_and_grad[0].trainable is True: + param_name = param_and_grad[0].name + ops = target_block.ops + device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName( + ) + for op in ops: + input_arg_names = op.input_arg_names + if param_name in input_arg_names: + self._param_device_map[param_name] = op.attr( + device_attr_name) + break + + def _get_device_for_param(self, param_name): + device = None + if param_name in self._param_device_map: + device = self._param_device_map[param_name] + return device + + def _create_optimization_pass(self, parameters_and_grads): + """Add optimization operators to update gradients to tensors. + + Args: + parameters_and_grads(list(tuple(Tensor, Tensor))): + a list of (tensor, gradient) pair to update. + + Returns: + return_op_list: a list of operators that will complete one step of + optimization. This will include parameter update ops, global step + update ops and any other custom ops required by subclasses to manage + their internal state. + """ + # This is a default implementation of create_optimization_pass that + # can be shared by most optimizers. This implementation assumes that + # the subclass will implement the _append_optimize_op method and the + # _initialize_tensors method. The subclass can extend the + # _create_accumulators method if it needs to create accumulators + # for parameters and extend _finish_update method to add custom ops. + + # Allways called under program_guard use global block as loss block + # But if current block is in control flow, append optimize op in the + # grad block of current block + + global_block = framework.default_main_program().global_block() + target_block = global_block + current_block = framework.default_main_program().current_block() + if current_block.idx != global_block.idx: + assert current_block.backward_block_idx != -1, \ + "current block is not global_block, but it doesn't have backward block." + target_block = framework.default_main_program().blocks[ + current_block.backward_block_idx] + + start = len(target_block.ops) + self.helper = LayerHelper(self.__class__.__name__) + self._update_param_device_map(parameters_and_grads, target_block) + self._create_accumulators( + target_block, + [p[0] for p in parameters_and_grads if p[0].trainable]) + self._create_global_learning_rate() + + if framework.in_dygraph_mode(): + for param_and_grad in parameters_and_grads: + if param_and_grad[1] is None: + continue + if param_and_grad[0].trainable is True: + self._append_optimize_op(target_block, param_and_grad) + else: + for param_and_grad in parameters_and_grads: + if param_and_grad[1] is None: + continue + with param_and_grad[0].block.program._optimized_guard( + param_and_grad), name_scope("optimizer"): + if param_and_grad[0].trainable is True: + device = self._get_device_for_param(param_and_grad[0] + .name) + with device_guard(device): + optimize_op = self._append_optimize_op( + target_block, param_and_grad) + + # Get custom finish ops for subclasses + # FIXME: Need to fix this once we figure out how to handle dependencies + self._finish_update(target_block, parameters_and_grads) + + end = len(target_block.ops) + return target_block._slice_ops(start, end) + + def _append_dgc_ops(self, param_and_grad): + pass + + def backward(self, + loss, + startup_program=None, + parameters=None, + no_grad_set=None, + callbacks=None): + """ + The first part of ``minimize``, do auto-diff to append backward operations for + the current program. + + Args: + loss (Tensor): ``loss`` tensor to run optimizations. + startup_program (Program, optional): :ref:`api_fluid_Program` for + initializing parameters in ``parameters``. The default value + is None, at this time :ref:`api_fluid_default_startup_program` will be used. + parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update + to minimize ``loss``. The default value is None, at this time all parameters + will be updated. + no_grad_set (set, optional): Set of ``Tensor`` or ``Tensor.name`` that don't need + to be updated. The default value is None. + callbacks (list, optional): list of callable objects to run when appending backward + operator for one parameter. The default value is None. + + Return: + list: list of (param, grad) tensor pairs, param is ``Parameter``, + grad is the gradient value corresponding to the parameter. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_tensor(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + # This can be any optimizer supported by dygraph. + adam = paddle.optimizer.Adam(learning_rate = 0.01, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adam.step() + adam.clear_grad() + """ + act_no_grad_set = None + if framework.in_dygraph_mode(): + pass + else: + act_no_grad_set = self._get_no_grad_set(loss, no_grad_set) + + self._dtype = loss.dtype + if framework.in_dygraph_mode(): + params_grads = [] + for param in self._parameter_list: + if not param.trainable: + continue + if param._grad_ivar() is not None: + # create gradient tensor + grad_var = param._grad_ivar() + params_grads.append((param, grad_var)) + else: + if callbacks is None: + callbacks = [error_clip_callback] + else: + assert (isinstance(callbacks, list)) + program = loss.block.program + assert len(loss.shape) == 1 and loss.shape[0] == 1, \ + "The loss.shape should be (1L,), but the current loss.shape is {}. " \ + "Maybe that you should call paddle.mean to process the current loss.".format( + loss.shape) + parameter_list = parameters if parameters \ + else self._parameter_list + with program_guard(program, startup_program): + params_grads = append_backward(loss, parameter_list, + act_no_grad_set, callbacks) + # Note: since we can't use all_reduce_op now, + # dgc_op should be the last op of one grad. + self._append_dgc_ops(params_grads) + return params_grads + + def apply_gradients(self, params_grads): + """ + Second part of `minimize`, appending optimization operators for + given `params_grads` pairs. + + Args: + params_grads (list): list of (param, grad) pair to do optimization. + + Returns: + list: A list of operators appended to the current program. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + optimizer = paddle.optimizer.Adam(learning_rate=0.1, + parameters=linear.parameters()) + params_grads = optimizer.backward(loss) + optimizer.apply_gradients(params_grads) + + """ + + params_grads = sorted(params_grads, key=lambda x: x[0].name) + + # 'optimizer(grad_clip)' or 'set_gradient_clip' + if self._grad_clip is not None: + params_grads = self._grad_clip(params_grads) + else: + + params_grads = append_gradient_clip_ops(params_grads) + + # Add regularization if any + params_grads = append_regularization_ops(params_grads, + self.regularization) + + optimize_ops = self._create_optimization_pass(params_grads) + return optimize_ops + + def _apply_optimize(self, loss, startup_program, params_grads): + """ + Second part of `minimize`, appending optimization operators for + given `params_grads` pairs. + Args: + loss (Tensor): loss tensor to run optimizations. + startup_program (Program): startup_program for initializing parameters + in `parameters`. + params_grads (list): list of (param, grad) pair to do optimization. + Returns: + list: A list of operators appended to the current program. + """ + if framework.in_dygraph_mode(): + with program_guard(framework.default_main_program(), + framework.default_startup_program()): + if self._grad_clip is not None: + params_grads = self._grad_clip(params_grads) + params_grads = append_regularization_ops(params_grads, + self.regularization) + optimize_ops = self._create_optimization_pass(params_grads) + else: + program = loss.block.program + with program_guard(program, startup_program): + optimize_ops = self.apply_gradients(params_grads) + return optimize_ops + + def _get_no_grad_set(self, loss, no_grad_set=None): + no_grad_set = _get_no_grad_set_name(no_grad_set) + parameters = loss.block.program.global_block().all_parameters() + param_no_trainable = set( + [param.name for param in parameters if param.trainable is False]) + # If the parameter is no trainable, it should not have a gradient. + no_grad_set.update(param_no_trainable) + + return no_grad_set + + @framework.dygraph_only + def clear_grad(self): + """ + Clear the gradients of all optimized parameters for model. + + Returns: + None + + Examples: + .. code-block:: python + + import numpy as np + import paddle + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_tensor(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + # This can be any optimizer supported by dygraph. + adam = paddle.optimizer.Adam(learning_rate = 0.01, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adam.step() + adam.clear_grad() + + """ + for p in self._parameter_list: + if p.trainable: + p.clear_gradient() + + @imperative_base.no_grad() + def minimize(self, + loss, + startup_program=None, + parameters=None, + no_grad_set=None): + """ + Add operations to minimize ``loss`` by updating ``parameters``. + + Args: + loss (Tensor): A ``Tensor`` containing the value to minimize. + startup_program (Program, optional): :ref:`api_fluid_Program` for + initializing parameters in ``parameters``. The default value + is None, at this time :ref:`api_fluid_default_startup_program` will be used. + parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update + to minimize ``loss``. The default value is None, at this time all parameters + will be updated. + no_grad_set (set, optional): Set of ``Tensor`` or ``Tensor.name`` that don't need + to be updated. The default value is None. + + Returns: + tuple: tuple (optimize_ops, params_grads), A list of operators appended + by minimize and a list of (param, grad) tensor pairs, param is + ``Parameter``, grad is the gradient value corresponding to the parameter. + The returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to + indicate program pruning. If so, the program will be pruned by ``feed`` and + ``fetch_list`` before run, see details in ``Executor``. + + Examples: + .. code-block:: python + + import paddle + import paddle.fluid as fluid + + place = fluid.CPUPlace() + main = fluid.Program() + with fluid.program_guard(main): + x = fluid.data(name='x', shape=[None, 13], dtype='float32') + y = fluid.data(name='y', shape=[None, 1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + + adam_optimizer = paddle.optimizer.Adam(0.01) + adam_optimizer.minimize(avg_cost) + + fetch_list = [avg_cost] + train_reader = paddle.batch( + paddle.dataset.uci_housing.train(), batch_size=1) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for data in train_reader(): + exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) + """ + assert isinstance(loss, Variable), "The loss should be an Tensor." + + parameter_list = parameters if parameters \ + else self._parameter_list + params_grads = self.backward( + loss, + startup_program=startup_program, + parameters=parameter_list, + no_grad_set=no_grad_set) + + optimize_ops = self._apply_optimize( + loss, startup_program=startup_program, params_grads=params_grads) + + return optimize_ops, params_grads + + @framework.dygraph_only + def step(self): + """ + Execute the optimizer once. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_tensor(value) + linear = paddle.nn.Linear(13, 5, dtype="float32") + # This can be any optimizer supported by dygraph. + adam = paddle.optimizer.Adam(learning_rate = 0.01, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adam.step() + adam.clear_grad() + """ + parameter_list = self._parameter_list + self._dtype = None + params_grads = [] + for param in self._parameter_list: + if not param.trainable: + continue + if param._grad_ivar() is not None: + grad_var = param._grad_ivar() + params_grads.append((param, grad_var)) + + optimize_ops = self._apply_optimize( + loss=None, startup_program=None, params_grads=params_grads) diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py new file mode 100644 index 0000000000000000000000000000000000000000..0bc4c9bfd53dc15449f03d6de6c8942e977bf562 --- /dev/null +++ b/python/paddle/optimizer/rmsprop.py @@ -0,0 +1,207 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .optimizer import Optimizer +from ..fluid import core +from ..fluid import framework +from ..fluid.framework import Variable + +__all__ = ["RMSProp"] + + +class RMSProp(Optimizer): + """ + Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning + rate method. The original slides proposed RMSProp: Slide 29 of + http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf . + + The original equation is as follows: + + .. math:: + + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 + + w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w) + + The first equation calculates moving average of the squared gradient for + each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`. + + In some cases, adding a momentum term :math: `\\beta` is beneficial. + In our implementation, Nesterov momentum is used: + + .. math:: + + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 + + v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{r(w,t) + + \\epsilon}} \\nabla Q_{i}(w) + + w & = w - v(w, t) + + if centered is True: + + .. math:: + + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 + + g(w, t) & = \\rho g(w, t-1) + (1 - \\rho)\\nabla Q_{i}(w) + + v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{r(w,t) - (g(w, t))^2 + + \\epsilon}} \\nabla Q_{i}(w) + + w & = w - v(w, t) + + where, :math:`\\rho` is a hyperparameter and typical values are 0.9, 0.95 + and so on. :math: `beta` is the momentum term. :math: `\\epsilon` is a + smoothing term to avoid division by zero, usually set somewhere in range + from 1e-4 to 1e-8. + + + Parameters: + learning_rate (float|LearningRateDecay): The learning rate used to update ``Parameter``. + It can be a float value or a LearningRateDecay. + rho(float): rho is :math: `\\rho` in equation, default is 0.95. + epsilon(float): :math: `\\epsilon` in equation is smoothing term to + avoid division by zero, default is 1e-6. + momentum(float): :math:`\\beta` in equation is the momentum term, + default is 0.0. + centered(bool): If True, gradients are normalized by the estimated variance of + the gradient; if False, by the uncentered second moment. Setting this to + True may help with training, but is slightly more expensive in terms of + computation and memory. Defaults to False. + parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. + grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three cliping strategies + ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , + :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. + name (str, optional): This parameter is used by developers to print debugging information. \ + For details, please refer to :ref:`api_guide_Name`. Default is None. + + Raises: + ValueError: If learning_rate, rho, epsilon, momentum are None. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + + adam = paddle.optimizer.RMSProp(learning_rate=0.1, + parameters=linear.parameters(), + weight_decay=0.01) + out.backward() + adam.step() + adam.clear_grad() + + """ + + _momentum_acc_str = "momentum" + _mean_square_acc_str = "mean_square" + _mean_grad_acc_str = "mean_grad" + + def __init__(self, + learning_rate, + rho=0.95, + epsilon=1.0e-6, + momentum=0.0, + centered=False, + parameters=None, + weight_decay=None, + grad_clip=None, + name=None): + if learning_rate is None: + raise ValueError("learning_rate is not set.") + if rho is None: + raise ValueError("rho is not set.") + if epsilon is None: + raise ValueError("epsilon is not set.") + if momentum is None: + raise ValueError("momentum is not set.") + + super(RMSProp, self).__init__( + learning_rate=learning_rate, + parameters=parameters, + weight_decay=weight_decay, + grad_clip=grad_clip, + name=name) + + self.type = "rmsprop" + self._rho = rho + self._epsilon = epsilon + self._momentum = momentum + self._centered = centered + + def _create_accumulators(self, block, parameters): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + for p in parameters: + self._add_accumulator(self._momentum_acc_str, p) + self._add_accumulator(self._mean_square_acc_str, p) + self._add_accumulator(self._mean_grad_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + momentum_acc = self._get_accumulator(self._momentum_acc_str, + param_and_grad[0]) + mean_square_acc = self._get_accumulator(self._mean_square_acc_str, + param_and_grad[0]) + mean_grad_acc = self._get_accumulator(self._mean_grad_acc_str, + param_and_grad[0]) + rmsprop_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": momentum_acc, + "MeanSquare": mean_square_acc, + "MeanGrad": mean_grad_acc, + "LearningRate": self._create_param_lr(param_and_grad), + }, + outputs={ + "ParamOut": param_and_grad[0], + "MomentOut": momentum_acc, + "MeanSquareOut": mean_square_acc, + "MeanGradOut": mean_grad_acc + }, + attrs={ + "epsilon": self._epsilon, + "decay": self._rho, + "momentum": self._momentum, + "centered": self._centered + }, + stop_gradient=True) + + return rmsprop_op diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index d31e5173f8b7d2bd1ab89267b11bdbbad9feb518..42a28a4f04e368cf8a1c1a144639bc743234a540 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -17,9 +17,13 @@ __all__ = [ 'append_backward', 'gradients', 'Executor', 'global_scope', 'scope_guard', 'BuildStrategy', 'CompiledProgram', 'Print', 'py_func', 'ExecutionStrategy', 'name_scope', 'ParallelExecutor', 'program_guard', 'WeightNormParamAttr', - 'default_main_program', 'default_startup_program', 'Program', 'save', 'load' + 'default_main_program', 'default_startup_program', 'Program', 'save', + 'load', 'data', 'InputSpec' ] +from . import nn +from .input import data #DEFINE_ALIAS +from .input import InputSpec #DEFINE_ALIAS from ..fluid.executor import Executor #DEFINE_ALIAS from ..fluid.executor import global_scope #DEFINE_ALIAS from ..fluid.executor import scope_guard #DEFINE_ALIAS diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py new file mode 100644 index 0000000000000000000000000000000000000000..06b9c7cdbef5dd11d237a2b85586e598611bf83e --- /dev/null +++ b/python/paddle/static/input.py @@ -0,0 +1,155 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import numpy as np +import six + +from paddle.fluid import core +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.data_feeder import check_dtype, check_type + +__all__ = ['data', 'InputSpec'] + + +def data(name, shape, dtype=None, lod_level=0): + """ + **Data Layer** + + This function creates a variable on the global block. The global variable + can be accessed by all the following operators in the graph. The variable + is a placeholder that could be fed with input, such as Executor can feed + input into the variable. When `dtype` is None, the dtype + will get from the global dtype by `paddle.get_default_dtype()`. + + Args: + name (str): The name/alias of the variable, see :ref:`api_guide_Name` + for more details. + shape (list|tuple): List|Tuple of integers declaring the shape. You can + set "None" or -1 at a dimension to indicate the dimension can be of any + size. For example, it is useful to set changeable batch size as "None" or -1. + dtype (np.dtype|str, optional): The type of the data. Supported + dtype: bool, float16, float32, float64, int8, int16, int32, int64, + uint8. Default: None. When `dtype` is not set, the dtype will get + from the global dtype by `paddle.get_default_dtype()`. + lod_level (int, optional): The LoD level of the LoDTensor. Usually users + don't have to set this value. For more details about when and how to + use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0. + + Returns: + Variable: The global variable that gives access to the data. + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + import paddle + + # Creates a variable with fixed size [3, 2, 1] + # User can only feed data of the same shape to x + # the dtype is not set, so it will set "float32" by + # paddle.get_default_dtype(). You can use paddle.get_default_dtype() to + # change the global dtype + x = paddle.static.data(name='x', shape=[3, 2, 1]) + + # Creates a variable with changeable batch size -1. + # Users can feed data of any batch size into y, + # but size of each data sample has to be [2, 1] + y = paddle.static.data(name='y', shape=[-1, 2, 1], dtype='float32') + + z = x + y + + # In this example, we will feed x and y with np-ndarray "1" + # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle + feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32) + + exe = fluid.Executor(fluid.CPUPlace()) + out = exe.run(fluid.default_main_program(), + feed={ + 'x': feed_data, + 'y': feed_data + }, + fetch_list=[z.name]) + + # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2 + print(out) + + """ + helper = LayerHelper('data', **locals()) + check_type(name, 'name', (six.binary_type, six.text_type), 'data') + check_type(shape, 'shape', (list, tuple), 'data') + + shape = list(shape) + for i in six.moves.range(len(shape)): + if shape[i] is None: + shape[i] = -1 + + if dtype: + return helper.create_global_variable( + name=name, + shape=shape, + dtype=dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + stop_gradient=True, + lod_level=lod_level, + is_data=True, + need_check_feed=True) + else: + return helper.create_global_variable( + name=name, + shape=shape, + dtype=paddle.get_default_dtype(), + type=core.VarDesc.VarType.LOD_TENSOR, + stop_gradient=True, + lod_level=lod_level, + is_data=True, + need_check_feed=True) + + +class InputSpec(object): + """ + Define input specification of the model. + + Args: + name (str): The name/alias of the variable, see :ref:`api_guide_Name` + for more details. + shape (tuple(integers)|list[integers]): List|Tuple of integers + declaring the shape. You can set "None" or -1 at a dimension + to indicate the dimension can be of any size. For example, + it is useful to set changeable batch size as "None" or -1. + dtype (np.dtype|str, optional): The type of the data. Supported + dtype: bool, float16, float32, float64, int8, int16, int32, int64, + uint8. Default: float32. + + Examples: + .. code-block:: python + + from paddle.static import InputSpec + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') + """ + + def __init__(self, shape=None, dtype='float32', name=None): + self.shape = shape + self.dtype = dtype + self.name = name + + def _create_feed_layer(self): + return data(self.name, shape=self.shape, dtype=self.dtype) + + def __repr__(self): + return '{}(shape={}, dtype={}, name={})'.format( + type(self).__name__, self.shape, self.dtype, self.name) diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index a295aae5de2def4d72a3e2a3c5c0d66d26387fc4..0fed32a1676759bd94961af0a8949d035ec48c8f 100644 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -22,9 +22,7 @@ from __future__ import print_function from .random import randperm from .attribute import rank #DEFINE_ALIAS from .attribute import shape #DEFINE_ALIAS -from .creation import create_tensor #DEFINE_ALIAS -# from .creation import create_lod_tensor #DEFINE_ALIAS -# from .creation import create_random_int_lodtensor #DEFINE_ALIAS +from .creation import to_tensor #DEFINE_ALIAS from .creation import crop_tensor #DEFINE_ALIAS from .creation import diag #DEFINE_ALIAS from .creation import eye #DEFINE_ALIAS @@ -60,7 +58,7 @@ from .logic import equal #DEFINE_ALIAS from .logic import greater_equal #DEFINE_ALIAS from .logic import greater_than #DEFINE_ALIAS from .logic import is_empty #DEFINE_ALIAS -from .logic import isfinite #DEFINE_ALIAS +#from .logic import isfinite #DEFINE_ALIAS from .logic import less_equal #DEFINE_ALIAS from .logic import less_than #DEFINE_ALIAS from .logic import logical_and #DEFINE_ALIAS @@ -76,7 +74,9 @@ from .logic import equal_all #DEFINE_ALIAS from .manipulation import cast #DEFINE_ALIAS from .manipulation import concat #DEFINE_ALIAS from .manipulation import expand #DEFINE_ALIAS +from .manipulation import broadcast_to #DEFINE_ALIAS from .manipulation import expand_as #DEFINE_ALIAS +from .manipulation import tile #DEFINE_ALIAS from .manipulation import flatten #DEFINE_ALIAS from .manipulation import gather #DEFINE_ALIAS from .manipulation import gather_nd #DEFINE_ALIAS @@ -99,6 +99,7 @@ from .manipulation import unstack #DEFINE_ALIAS from .manipulation import flip #DEFINE_ALIAS from .manipulation import unbind #DEFINE_ALIAS from .manipulation import roll #DEFINE_ALIAS +from .manipulation import chunk #DEFINE_ALIAS from .math import abs #DEFINE_ALIAS from .math import acos #DEFINE_ALIAS from .math import asin #DEFINE_ALIAS @@ -110,6 +111,7 @@ from .math import cumsum #DEFINE_ALIAS from .math import elementwise_add #DEFINE_ALIAS from .math import elementwise_div #DEFINE_ALIAS from .math import elementwise_floordiv #DEFINE_ALIAS +from .math import elementwise_mul #DEFINE_ALIAS from .math import elementwise_mod #DEFINE_ALIAS from .math import elementwise_pow #DEFINE_ALIAS from .math import elementwise_sub #DEFINE_ALIAS @@ -142,7 +144,11 @@ from .math import maximum #DEFINE_ALIAS from .math import min #DEFINE_ALIAS from .math import minimum #DEFINE_ALIAS from .math import mm #DEFINE_ALIAS -from .math import div #DEFINE_ALIAS +from .math import divide #DEFINE_ALIAS +from .math import floor_divide #DEFINE_ALIAS +from .math import remainder #DEFINE_ALIAS +from .math import mod #DEFINE_ALIAS +from .math import floor_mod #DEFINE_ALIAS from .math import multiply #DEFINE_ALIAS from .math import add #DEFINE_ALIAS from .math import atan #DEFINE_ALIAS @@ -152,11 +158,16 @@ from .math import log1p #DEFINE_ALIAS from .math import erf #DEFINE_ALIAS from .math import addcmul #DEFINE_ALIAS from .math import addmm #DEFINE_ALIAS -from .math import clamp #DEFINE_ALIAS +from .math import clip #DEFINE_ALIAS from .math import trace #DEFINE_ALIAS from .math import kron #DEFINE_ALIAS -# from .random import gaussin #DEFINE_ALIAS -# from .random import uniform #DEFINE_ALIAS +from .math import isfinite #DEFINE_ALIAS +from .math import isinf #DEFINE_ALIAS +from .math import isnan #DEFINE_ALIAS +from .math import prod #DEFINE_ALIAS +from .random import standard_normal +from .random import normal +from .random import uniform #DEFINE_ALIAS from .random import shuffle #DEFINE_ALIAS from .random import randn #DEFINE_ALIAS from .random import rand #DEFINE_ALIAS @@ -174,10 +185,12 @@ from .search import index_select #DEFINE_ALIAS from .search import nonzero #DEFINE_ALIAS from .search import sort #DEFINE_ALIAS from .search import index_sample #DEFINE_ALIAS +from .search import masked_select #DEFINE_ALIAS from .stat import mean #DEFINE_ALIAS from .stat import reduce_mean #DEFINE_ALIAS from .stat import std #DEFINE_ALIAS from .stat import var #DEFINE_ALIAS +from .stat import numel #DEFINE_ALIAS # from .tensor import Tensor #DEFINE_ALIAS # from .tensor import LoDTensor #DEFINE_ALIAS # from .tensor import LoDTensorArray #DEFINE_ALIAS diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 0875fb4c219a0876eab7595d654ed144aedaeac7..1911d8ccc25e01ee6419fd26126881304ab61f01 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -13,7 +13,12 @@ # limitations under the License. from __future__ import print_function +import numpy as np + from ..fluid.framework import Variable +from ..fluid.framework import unique_name +from ..fluid.framework import _current_expected_place +from ..fluid.framework import dygraph_only from ..fluid.initializer import Constant from ..fluid.layers import core from ..fluid.layer_helper import LayerHelper @@ -21,20 +26,15 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid.framework import convert_np_dtype_to_dtype_, in_dygraph_mode, _varbase_creator, device_guard, OpProtoHolder from ..fluid.layers import fill_constant from paddle.common_ops_import import * -import paddle # TODO: define functions to get create a tensor from ..fluid.layers import crop_tensor #DEFINE_ALIAS -from ..fluid.layers import diag #DEFINE_ALIAS from ..fluid.layers import fill_constant #DEFINE_ALIAS -from ..fluid.layers import create_tensor #DEFINE_ALIAS from ..fluid.layers import linspace #DEFINE_ALIAS import paddle __all__ = [ - 'create_tensor', - # 'create_lod_tensor', - # 'create_random_int_lodtensor', + 'to_tensor', 'crop_tensor', 'diag', 'fill_constant', @@ -54,10 +54,172 @@ __all__ = [ ] +@dygraph_only +def to_tensor(data, dtype=None, place=None, stop_gradient=True): + """ + Constructs a ``paddle.Tensor`` or ``paddle.ComplexTensor`` from ``data`` , + which can be scalar, tuple, list, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor. + + If the ``data`` is already a tensor, and ``dtype`` or ``place`` does't change, no copy + will be performed and return origin tensor, otherwise a new tensor will be constructed + and returned. Similarly, if the data is an numpy\.ndarray of with the same ``dtype`` + and the current place is cpu, no copy will be performed. + + The ``ComplexTensor`` is a unique type of paddle. If x is ``ComplexTensor``, then + ``x.real`` is the real part, and ``x.imag`` is the imaginary part. + + Args: + data(scalar|tuple|list|ndarray|Tensor|ComplexTensor): Initial data for the tensor. + Can be a scalar, list, tuple, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor. + dtype(str, optional): The desired data type of returned tensor. Can be 'bool' , 'float16' , + 'float32' , 'float64' , 'int8' , 'int16' , 'int32' , 'int64' , 'uint8'. And + 'complex64' , 'complex128' only for ComplexTensor. + Default: None, infers data type from ``data`` . + place(CPUPlace|CUDAPinnedPlace|CUDAPlace, optional): The place to allocate Tensor. Can be + CPUPlace, CUDAPinnedPlace, CUDAPlace. Default: None, means global place. + stop_gradient(bool, optional): Whether to block the gradient propagation of Autograd. Default: True. + + Returns: + Tensor: A Tensor or ComplexTensor constructed from ``data``. + + Raises: + TypeError: If the data type of ``data`` is not scalar, list, tuple, numpy.ndarray, paddle.Tensor, paddle.ComplexTensor + ValueError: If ``data`` is tuple|list, it can't contain nested tuple|list with different lengths , such as: [[1, 2], [3, 4, 5]] + TypeError: If ``dtype`` is not bool, float16, float32, float64, int8, int16, int32, int64, uint8, complex64, complex128 + ValueError: If ``place`` is not paddle.Place, paddle.CUDAPinnedPlace, paddle.CUDAPlace + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + paddle.enable_imperative() + + type(paddle.to_tensor(1)) + # + + paddle.to_tensor(1) + # Tensor: generated_tensor_0 + # - place: CUDAPlace(0) # allocate on global default place CPU:0 + # - shape: [1] + # - layout: NCHW + # - dtype: int64_t + # - data: [1] + + x = paddle.to_tensor(1) + paddle.to_tensor(x, dtype='int32', place=paddle.CPUPlace()) # A new tensor will be constructed due to different dtype or place + # Tensor: generated_tensor_01 + # - place: CPUPlace + # - shape: [1] + # - layout: NCHW + # - dtype: int + # - data: [1] + + paddle.to_tensor((1.1, 2.2), place=paddle.CUDAPinnedPlace()) + # Tensor: generated_tensor_1 + # - place: CUDAPinnedPlace + # - shape: [2] + # - layout: NCHW + # - dtype: double + # - data: [1.1 2.2] + + paddle.to_tensor([[0.1, 0.2], [0.3, 0.4]], place=paddle.CUDAPlace(0), stop_gradient=False) + # Tensor: generated_tensor_2 + # - place: CUDAPlace(0) + # - shape: [2, 2] + # - layout: NCHW + # - dtype: double + # - data: [0.1 0.2 0.3 0.4] + + type(paddle.to_tensor([[1+1j, 2], [3+2j, 4]]), , dtype='complex64') + # + + paddle.to_tensor([[1+1j, 2], [3+2j, 4]], dtype='complex64') + # ComplexTensor[real]: generated_tensor_0.real + # - place: CUDAPlace(0) + # - shape: [2, 2] + # - layout: NCHW + # - dtype: float + # - data: [1 2 3 4] + # ComplexTensor[imag]: generated_tensor_0.imag + # - place: CUDAPlace(0) + # - shape: [2, 2] + # - layout: NCHW + # - dtype: float + # - data: [1 0 2 0] + """ + + if place is None: + place = _current_expected_place() + elif not isinstance(place, + (core.CPUPlace, core.CUDAPinnedPlace, core.CUDAPlace)): + raise ValueError( + "'place' must be any of paddle.Place, paddle.CUDAPinnedPlace, paddle.CUDAPlace" + ) + + #Todo(zhouwei): Support allocate tensor on any other specified card + if isinstance(place, core.CUDAPlace) and isinstance( + _current_expected_place(), core.CUDAPlace) and place._get_device_id( + ) != _current_expected_place()._get_device_id(): + place = _current_expected_place() + + if not isinstance(data, np.ndarray): + if np.isscalar(data) and not isinstance(data, str): + data = np.array([data]) + elif isinstance(data, (list, tuple)): + data = np.array(data) + if data.dtype == np.object: + raise ValueError( + "\n\tFaild to convert input data to a regular ndarray :\n\t - Usually " + "this means the input data contains nested lists with different lengths. " + ) + elif isinstance(data, paddle.Tensor): + data.stop_gradient = stop_gradient + if not data.place._equals(place): + data = data._copy_to(place, False) + if dtype: + if convert_dtype(dtype) != convert_dtype(data.dtype): + return data.astype(convert_dtype(dtype)) + return data + elif isinstance(data, paddle.ComplexTensor): + return data + else: + raise TypeError( + "Can't constructs a 'paddle.Tensor' with data type {}, data type must be scalar|list|tuple|numpy.ndarray|paddle.Tensor|paddle.ComplexTensor". + format(type(data))) + + if dtype: + dtype = convert_dtype(dtype) + if dtype != data.dtype: + data = data.astype(dtype) + + if not np.iscomplexobj(data): + return paddle.Tensor( + value=data, + place=place, + persistable=False, + zero_copy=True, + stop_gradient=stop_gradient) + else: + name = unique_name.generate('generated_tensor') + real_tensor = paddle.Tensor( + value=data.real, + place=place, + zero_copy=True, + name=name + ".real", + stop_gradient=stop_gradient) + imag_tensor = paddle.Tensor( + value=data.imag, + place=place, + zero_copy=True, + name=name + ".imag", + stop_gradient=stop_gradient) + return paddle.ComplexTensor(real_tensor, imag_tensor) + + def full_like(x, fill_value, dtype=None, name=None): """ - :alias_main: paddle.full_like - :alias: paddle.tensor.full_like, paddle.tensor.creation.full_like This function creates a tensor filled with ``fill_value`` which has identical shape of ``x`` and ``dtype``. If the ``dtype`` is None, the data type of Tensor is same with ``x``. @@ -65,7 +227,7 @@ def full_like(x, fill_value, dtype=None, name=None): Args: x(Tensor): The input tensor which specifies shape and data type. The data type can be bool, float16, float32, float64, int32, int64. fill_value(bool|float|int): The value to fill the tensor with. Note: this value shouldn't exceed the range of the output data type. - dtype(np.dtype|core.VarDesc.VarType|str, optional): The data type of output. The data type can be one + dtype(np.dtype|str, optional): The data type of output. The data type can be one of bool, float16, float32, float64, int32, int64. The default value is None, which means the output data type is the same as input. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` @@ -120,14 +282,12 @@ def full_like(x, fill_value, dtype=None, name=None): def ones(shape, dtype=None, name=None): """ - :alias_main: paddle.ones - :alias: paddle.tensor.ones, paddle.tensor.creation.ones The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 1. Args: shape(tuple|list|Tensor): Shape of the Tensor to be created, the data type of shape is int32 or int64. - dtype(np.dtype|core.VarDesc.VarType|str, optional): Data type of output Tensor, it supports + dtype(np.dtype|str, optional): Data type of output Tensor, it supports bool, float16, float32, float64, int32 and int64. Default: if None, the data type is 'float32'. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` @@ -196,14 +356,14 @@ def ones_like(x, dtype=None, name=None): Examples: .. code-block:: python - import paddle - import numpy as np + import paddle + import numpy as np - paddle.disable_static() + paddle.disable_static() - x = paddle.to_variable(np.array([1,2,3], dtype='float32')) - out1 = paddle.zeros_like(x) # [1., 1., 1.] - out2 = paddle.zeros_like(x, dtype='int32') # [1, 1, 1] + x = paddle.to_tensor(np.array([1,2,3], dtype='float32')) + out1 = paddle.zeros_like(x) # [1., 1., 1.] + out2 = paddle.zeros_like(x, dtype='int32') # [1, 1, 1] """ return full_like(x=x, fill_value=1, dtype=dtype, name=name) @@ -211,14 +371,11 @@ def ones_like(x, dtype=None, name=None): def zeros(shape, dtype=None, name=None): """ - :alias_main: paddle.zeros - :alias: paddle.tensor.zeros, paddle.tensor.creation.zeros - The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 0. Args: shape(tuple|list|Tensor): Shape of the Tensor to be created, the data type of ``shape`` is int32 or int64. - dtype(np.dtype|core.VarDesc.VarType|str, optional): Data type of output Tensor, it supports + dtype(np.dtype|str, optional): Data type of output Tensor, it supports bool, float16, float32, float64, int32 and int64. Default: if None, the date type is float32. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -286,14 +443,14 @@ def zeros_like(x, dtype=None, name=None): Examples: .. code-block:: python - import paddle - import numpy as np + import paddle + import numpy as np - paddle.disable_static() + paddle.disable_static() - x = paddle.to_variable(np.array([1,2,3], dtype='float32')) - out1 = paddle.zeros_like(x) # [0., 0., 0.] - out2 = paddle.zeros_like(x, dtype='int32') # [0, 0, 0] + x = paddle.to_tensor(np.array([1,2,3], dtype='float32')) + out1 = paddle.zeros_like(x) # [0., 0., 0.] + out2 = paddle.zeros_like(x, dtype='int32') # [0, 0, 0] """ return full_like(x=x, fill_value=0, dtype=dtype, name=name) @@ -301,8 +458,6 @@ def zeros_like(x, dtype=None, name=None): def eye(num_rows, num_columns=None, dtype=None, name=None): """ - :alias_main: paddle.eye - :alias: paddle.tensor.eye, paddle.tensor.creation.eye This function constructs 2-D Tensor with ones on the diagonal and zeros elsewhere. @@ -310,7 +465,7 @@ def eye(num_rows, num_columns=None, dtype=None, name=None): num_rows(int): the number of rows in each batch Tensor. num_columns(int, optional): the number of columns in each batch Tensor. If None, default: num_rows. - dtype(np.dtype|core.VarDesc.VarType|str, optional): The data type of the returned Tensor. + dtype(np.dtype|str, optional): The data type of the returned Tensor. It should be int32, int64, float16, float32, float64. Default: if None, the data type is float32. name(str, optional): The default value is None. Normally there is no need for @@ -351,8 +506,6 @@ def eye(num_rows, num_columns=None, dtype=None, name=None): def full(shape, fill_value, dtype=None, name=None): """ - :alias_main: paddle.full - :alias: paddle.tensor.full, paddle.tensor.creation.full This Op return a Tensor with the ``fill_value`` which size is same as ``shape``. @@ -363,7 +516,7 @@ def full(shape, fill_value, dtype=None, name=None): If ``shape`` is an Tensor, it should be an 1-D Tensor . fill_value(bool|float|int|Tensor): The constant value used to initialize the Tensor to be created. If ``fill_value`` is an Tensor, it must be an 1-D Tensor. - dtype(np.dtype|core.VarDesc.VarType|str, optional): Data type of the output Tensor + dtype(np.dtype|str, optional): Data type of the output Tensor which can be float16, float32, float64, int32, int64, if dytpe is `None`, the data type of created Tensor is `float32` name(str, optional): The default value is None. Normally there is no need for user to set this @@ -471,7 +624,7 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): out3 = paddle.arange(4.999, dtype='float32') # [0., 1., 2., 3., 4.] - start_var = paddle.to_variable(np.array([3])) + start_var = paddle.to_tensor(np.array([3])) out4 = paddle.arange(start_var, 7) # [3, 4, 5, 6] @@ -713,8 +866,8 @@ def meshgrid(*args, **kwargs): input_3 = np.random.randint(0, 100, [100, ]).astype('int32') input_4 = np.random.randint(0, 100, [200, ]).astype('int32') - tensor_3 = paddle.to_variable(input_3) - tensor_4 = paddle.to_variable(input_4) + tensor_3 = paddle.to_tensor(input_3) + tensor_4 = paddle.to_tensor(input_4) grid_x, grid_y = paddle.tensor.meshgrid(tensor_3, tensor_4) #the shape of grid_x is (100, 200) @@ -749,3 +902,92 @@ def meshgrid(*args, **kwargs): type='meshgrid', inputs={'X': list(args)}, outputs={'Out': out}) return out + + +def diag(x, offset=0, padding_value=0, name=None): + """ + If ``x`` is a vector (1-D tensor), a 2-D square tensor whth the elements of ``x`` as the diagonal is returned. + + If ``x`` is a matrix (2-D tensor), a 1-D tensor with the diagonal elements of ``x`` is returned. + + The argument ``offset`` controls the diagonal offset: + + If ``offset`` = 0, it is the main diagonal. + + If ``offset`` > 0, it is superdiagonal. + + If ``offset`` < 0, it is subdiagonal. + + Args: + x (Tensor): The input tensor. Its shape is either 1-D or 2-D. Its data type should be float32, float64, int32, int64. + offset (int, optional): The diagonal offset. A positive value represents superdiagonal, 0 represents the main diagonal, and a negative value represents subdiagonal. + padding_value (int|float, optional): Use this value to fill the area outside the specified diagonal band. Only takes effect when the input is a 1-D Tensor. The default value is 0. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor, a square matrix or a vector. The output data type is the same as input data type. + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + x = paddle.to_tensor([1, 2, 3]) + y = paddle.diag(x) + print(y.numpy()) + # [[1 0 0] + # [0 2 0] + # [0 0 3]] + + y = paddle.diag(x, offset=1) + print(y.numpy()) + # [[0 1 0 0] + # [0 0 2 0] + # [0 0 0 3] + # [0 0 0 0]] + + y = paddle.diag(x, padding_value=6) + print(y.numpy()) + # [[1 6 6] + # [6 2 6] + # [6 6 3]] + + .. code-block:: python + + import paddle + + paddle.disable_static() + x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]]) + y = paddle.diag(x) + print(y.numpy()) + # [1 5] + + y = paddle.diag(x, offset=1) + print(y.numpy()) + # [2 6] + + y = paddle.diag(x, offset=-1) + print(y.numpy()) + # [4] + """ + if in_dygraph_mode(): + return core.ops.diag_v2(x, "offset", offset, "padding_value", + padding_value) + + check_type(x, 'x', (Variable), 'diag_v2') + check_dtype(x.dtype, 'x', ['float32', 'float64', 'int32', 'int64'], + 'diag_v2') + helper = LayerHelper("diag_v2", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type='diag_v2', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'offset': offset, + 'padding_value': padding_value}) + + out.stop_gradient = True + return out diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 972c9fbce4d2ab11b4a0bbc4f9818f721486c741..a7bf2272a599ef2d6de076e7129b43152ca47b06 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -35,135 +35,134 @@ __all__ = [ ] -def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): +def matmul(x, y, transpose_x=False, transpose_y=False, name=None): """ - :alias_main: paddle.matmul - :alias: paddle.matmul,paddle.tensor.matmul,paddle.tensor.linalg.matmul + Applies matrix multiplication to two tensors. `matmul` follows + the complete broadcast rules, + and its behavior is consistent with `np.matmul`. - Applies matrix multiplication to two tensors. - - Currently, the input tensors' rank can be any, but when the rank of any - inputs is bigger than 3, this two inputs' rank should be equal. + Currently, the input tensors' number of dimensions can be any, `matmul` can be used to + achieve the `dot`, `matmul` and `batchmatmul`. The actual behavior depends on the shapes of :math:`x`, :math:`y` and the flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically: - If a transpose flag is specified, the last two dimensions of the tensor - are transposed. If the tensor is rank-1 of shape :math:`[D]`, then for - :math:`x` it is treated as :math:`[1, D]` in nontransposed form and as - :math:`[D, 1]` in transposed form, whereas for :math:`y` it is the - opposite: It is treated as :math:`[D, 1]` in nontransposed form and as - :math:`[1, D]` in transposed form. - - - After transpose, the two tensors are 2-D or n-D and matrix multiplication - performs in the following way. - - - If both are 2-D, they are multiplied like conventional matrices. - - If either is n-D, it is treated as a stack of matrices residing in the - last two dimensions and a batched matrix multiply supporting broadcast - applies on the two tensors. - - Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and - nontransposed, the prepended or appended dimension :math:`1` will be - removed after matrix multiplication. + are transposed. If the tensor is ndim-1 of shape, the transpose is invalid. If the tensor + is ndim-1 of shape :math:`[D]`, then for :math:`x` it is treated as :math:`[1, D]`, whereas + for :math:`y` it is the opposite: It is treated as :math:`[D, 1]`. + + The multiplication behavior depends on the dimensions of `x` and `y`. Specifically: + + - If both tensors are 1-dimensional, the dot product result is obtained. + + - If both tensors are 2-dimensional, the matrix-matrix product is obtained. + + - If the `x` is 1-dimensional and the `y` is 2-dimensional, + a `1` is prepended to its dimension in order to conduct the matrix multiply. + After the matrix multiply, the prepended dimension is removed. + + - If the `x` is 2-dimensional and `y` is 1-dimensional, + the matrix-vector product is obtained. + + - If both arguments are at least 1-dimensional and at least one argument + is N-dimensional (where N > 2), then a batched matrix multiply is obtained. + If the first argument is 1-dimensional, a 1 is prepended to its dimension + in order to conduct the batched matrix multiply and removed after. + If the second argument is 1-dimensional, a 1 is appended to its + dimension for the purpose of the batched matrix multiple and removed after. + The non-matrix (exclude the last two dimensions) dimensions are + broadcasted according the broadcast rule. + For example, if input is a (j, 1, n, m) tensor and the other is a (k, m, p) tensor, + out will be a (j, k, n, p) tensor. Args: - x (Variable): The input variable which is a Tensor or LoDTensor. - y (Variable): The input variable which is a Tensor or LoDTensor. + x (Tensor): The input tensor which is a Tensor. + y (Tensor): The input tensor which is a Tensor. transpose_x (bool): Whether to transpose :math:`x` before multiplication. transpose_y (bool): Whether to transpose :math:`y` before multiplication. - alpha (float): The scale of output. Default 1.0. name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. Returns: - Variable: The product Tensor (or LoDTensor) variable. + Tensor: The output Tensor. Examples: - .. code-block:: python - - # Examples to clarify shapes of the inputs and output - # x: [B, ..., M, K], y: [B, ..., K, N] - # paddle.matmul(x, y) # out: [B, ..., M, N] - - # x: [B, M, K], y: [B, K, N] - # paddle.matmul(x, y) # out: [B, M, N] - - # x: [B, M, K], y: [K, N] - # paddle.matmul(x, y) # out: [B, M, N] - - # x: [M, K], y: [K, N] - # paddle.matmul(x, y) # out: [M, N] - # x: [B, M, K], y: [K] - # paddle.matmul(x, y) # out: [B, M] + .. code-block:: python - # x: [K], y: [K] - # paddle.matmul(x, y) # out: [1] + import paddle + import numpy as np - # x: [M], y: [N] - # paddle.matmul(x, y, True, True) # out: [M, N] + paddle.disable_static() + # vector * vector + x_data = np.random.random([10]).astype(np.float32) + y_data = np.random.random([10]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [1] + + # matrix * vector + x_data = np.random.random([10, 5]).astype(np.float32) + y_data = np.random.random([5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10] + + # batched matrix * broadcasted vector + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([2]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5] + + # batched matrix * batched matrix + x_data = np.random.random([10, 5, 2]).astype(np.float32) + y_data = np.random.random([10, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 5, 5] + + # batched matrix * broadcasted matrix + x_data = np.random.random([10, 1, 5, 2]).astype(np.float32) + y_data = np.random.random([1, 3, 2, 5]).astype(np.float32) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + z = paddle.matmul(x, y) + print(z.numpy().shape) + # [10, 3, 5, 5] - import paddle - import paddle.fluid as fluid - x = fluid.data(name='x', shape=[2, 3], dtype='float32') - y = fluid.data(name='y', shape=[3, 2], dtype='float32') - out = paddle.matmul(x, y, True, True) """ + op_type = 'matmul_v2' + if in_dygraph_mode(): + op = getattr(core.ops, op_type) + return op(x, y, 'trans_x', transpose_x, 'trans_y', transpose_y) + attrs = { - 'transpose_X': transpose_x, - 'transpose_Y': transpose_y, - 'alpha': float(alpha), + 'trans_x': transpose_x, + 'trans_y': transpose_y, } - if in_dygraph_mode(): - out = _varbase_creator(dtype=x.dtype) - core.ops.matmul(x, y, out, 'transpose_X', transpose_x, 'transpose_Y', - transpose_y, 'alpha', float(alpha)) - return out - def __check_input(x, y): var_names = {'x': x, 'y': y} for name, val in var_names.items(): - check_variable_and_dtype( - val, name, ['float16', 'float32', 'float64'], 'matmul') - x_shape = list(x.shape) - y_shape = list(y.shape) - if len(x_shape) == 1: - x_shape = [1] + x_shape - if len(y_shape) == 1: - y_shape = y_shape + [1] - - # check the inner 2 dimensions - if transpose_x: - x_shape[-2], x_shape[-1] = x_shape[-1], x_shape[-2] - if transpose_y: - y_shape[-2], y_shape[-1] = y_shape[-1], y_shape[-2] - if x_shape[-1] != y_shape[-2]: - assert (x_shape[-1] == -1) or (y_shape[-2] == -1), \ - "After performing an optional transpose, Input X's width should be " \ - "equal to Y's width for multiplication " \ - "prerequisites. But received X's shape: %s, Y's shape: %s\n" % \ - (x_shape, y_shape) - - if len(y_shape) > 2 and len(x_shape) > 2: - for i, dim_x in enumerate(x_shape[:-2]): - # don't check neg shape - if dim_x < 0 or y_shape[i] < 0: - continue - if dim_x != y_shape[i]: - raise ValueError( - "When the matrix is larger than 2 dimensions, the higher " - "dimensional values of the two matrices need to be equal. " - "But received x_shape[%d] != y_shape[%d]. X's shape: %s, " - "Y's shape: %s.\n" % (i, i, x_shape, y_shape)) + check_variable_and_dtype(val, name, ['float32', 'float64'], + 'matmul') __check_input(x, y) - helper = LayerHelper('matmul', **locals()) + helper = LayerHelper('matmul_v2', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( - type='matmul', + type='matmul_v2', inputs={'X': x, 'Y': y}, outputs={'Out': out}, @@ -455,11 +454,12 @@ def dot(x, y, name=None): This operator calculates inner product for vectors. .. note:: - Only support 1-d Tensor(vector). + Support 1-d and 2-d Tensor. When it is 2d, the first dimension of this matrix + is the batch dimension, which means that the vectors of multiple batches are dotted. Parameters: - x(Tensor): 1-D ``Tensor``. Its datatype should be ``float32``, ``float64``, ``int32``, ``int64`` - y(Tensor): 1-D ``Tensor``. Its datatype soulde be ``float32``, ``float64``, ``int32``, ``int64`` + x(Tensor): 1-D or 2-D ``Tensor``. Its dtype should be ``float32``, ``float64``, ``int32``, ``int64`` + y(Tensor): 1-D or 2-D ``Tensor``. Its dtype soulde be ``float32``, ``float64``, ``int32``, ``int64`` name(str, optional): Name of the output. Default is None. It's used to print debug info for developers. Details: :ref:`api_guide_Name` Returns: @@ -470,14 +470,13 @@ def dot(x, y, name=None): .. code-block:: python import paddle - import paddle.fluid as fluid import numpy as np paddle.disable_static() x_data = np.random.uniform(0.1, 1, [10]).astype(np.float32) y_data = np.random.uniform(1, 3, [10]).astype(np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) z = paddle.dot(x, y) print(z.numpy()) diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index 18dbeb0c46e8a3416d7d57f92ffa6064510250b3..36b558d597c1ce1333a8f1eec54e2fd2813625e3 100644 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -13,9 +13,11 @@ # limitations under the License. from ..fluid.layer_helper import LayerHelper -from ..fluid.data_feeder import check_type +from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.layers.layer_function_generator import templatedoc from .. import fluid +from ..fluid.framework import in_dygraph_mode +from paddle.common_ops_import import * # TODO: define logic functions of a tensor from ..fluid.layers import is_empty #DEFINE_ALIAS @@ -91,75 +93,70 @@ def equal_all(x, y, name=None): @templatedoc() -def allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): +def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): """ - :alias_main: paddle.allclose - :alias: paddle.allclose,paddle.tensor.allclose,paddle.tensor.logic.allclose - ${comment} Args: - input(inputtype):{input_comment}. - other(othertype):{other_comment}. - rtol(rtoltype,optional):{rtol_comment}. - atol(atoltype,optional):{atol_comment}. - equal_nan(equalnantype,optional):{equal_nan_comment}. - name(STR, optional): The default value is None. - Normally there is no need for user to set this property. - For more information, please refer to :ref:`api_guide_Name`. + x(Tensor): ${input_comment}. + y(Tensor): ${other_comment}. + rtol(rtoltype, optional): ${rtol_comment}. + atol(atoltype, optional): ${atol_comment}. + equal_nan(equalnantype, optional): ${equal_nan_comment}. + name (str, optional): Name for the operation. For more information, please + refer to :ref:`api_guide_Name`. Default: None. Returns: - ${out_comment}. + Tensor: ${out_comment}. + + Raises: + TypeError: The data type of ``x`` must be one of float32, float64. + TypeError: The data type of ``y`` must be one of float32, float64. + TypeError: The type of ``rtol`` must be float. + TypeError: The type of ``atol`` must be float. + TypeError: The type of ``equal_nan`` must be bool. - Return Type: - ${out_type} - Examples: .. code-block:: python import paddle - import paddle.fluid as fluid import numpy as np - use_cuda = fluid.core.is_compiled_with_cuda() - - a = fluid.data(name="a", shape=[2], dtype='float32') - b = fluid.data(name="b", shape=[2], dtype='float32') + paddle.disable_static() - result = paddle.allclose(a, b, rtol=1e-05, atol=1e-08, + np_x = np.array([10000., 1e-07]).astype("float32") + np_y = np.array([10000.1, 1e-08]).astype("float32") + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") - result_nan = paddle.allclose(a, b, rtol=1e-05, atol=1e-08, + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=True, name="equal_nan") - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - x = np.array([10000., 1e-07]).astype("float32") - y = np.array([10000.1, 1e-08]).astype("float32") - result_v, result_nan_v = exe.run( - feed={'a': x, 'b': y}, - fetch_list=[result, result_nan]) - print(result_v, result_nan_v) - # Output: (array([False]), array([False])) - - x = np.array([10000., 1e-08]).astype("float32") - y = np.array([10000.1, 1e-09]).astype("float32") - result_v, result_nan_v = exe.run( - feed={'a': x, 'b': y}, - fetch_list=[result, result_nan]) - print(result_v, result_nan_v) - # Output: (array([ True]), array([ True])) - - x = np.array([1.0, float('nan')]).astype("float32") - y = np.array([1.0, float('nan')]).astype("float32") - result_v, result_nan_v = exe.run( - feed={'a': x, 'b': y}, - fetch_list=[result, result_nan]) - print(result_v, result_nan_v) - # Output: (array([False]), array([ True])) + np_result2 = result2.numpy() + # [False] + + np_x = np.array([1.0, float('nan')]).astype("float32") + np_y = np.array([1.0, float('nan')]).astype("float32") + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=False, name="ignore_nan") + np_result1 = result1.numpy() + # [False] + result2 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, + equal_nan=True, name="equal_nan") + np_result2 = result2.numpy() + # [True] """ + if in_dygraph_mode(): + return core.ops.allclose(x, y, 'rtol', rtol, 'atol', atol, 'equal_nan', + equal_nan) + + check_variable_and_dtype(x, "input", ['float32', 'float64'], 'allclose') + check_variable_and_dtype(y, "input", ['float32', 'float64'], 'allclose') check_type(rtol, 'rtol', float, 'allclose') check_type(atol, 'atol', float, 'allclose') check_type(equal_nan, 'equal_nan', bool, 'allclose') @@ -167,7 +164,7 @@ def allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): helper = LayerHelper("allclose", **locals()) out = helper.create_variable_for_type_inference(dtype='bool') - inputs = {'Input': input, 'Other': other} + inputs = {'Input': x, 'Other': y} outputs = {'Out': out} attrs = {'rtol': rtol, 'atol': atol, 'equal_nan': equal_nan} helper.append_op( diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index bffdf15864f01b8432a7dd21ae0bb353220fb6c4..65469759a38087b2919ada6e73aebaaadf93c905 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -14,7 +14,7 @@ from __future__ import print_function -from ..fluid.layers import core, reshape +from ..fluid.layers import core from ..fluid.layer_helper import LayerHelper from ..fluid.framework import Variable, OpProtoHolder, in_dygraph_mode, convert_np_dtype_to_dtype_ from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype @@ -23,17 +23,13 @@ from ..fluid.layers import utils import numpy as np # TODO: define functions to manipulate a tensor from ..fluid.layers import cast #DEFINE_ALIAS -from ..fluid.layers import expand #DEFINE_ALIAS from ..fluid.layers import expand_as #DEFINE_ALIAS -from ..fluid.layers import reshape #DEFINE_ALIAS -from ..fluid.layers import scatter #DEFINE_ALIAS from ..fluid.layers import slice #DEFINE_ALIAS from ..fluid.layers import strided_slice #DEFINE_ALIAS from ..fluid.layers import transpose #DEFINE_ALIAS from ..fluid.layers import unique #DEFINE_ALIAS from ..fluid.layers import unstack #DEFINE_ALIAS -from ..fluid.layers import gather_nd #DEFINE_ALIAS from ..fluid.layers import scatter_nd_add #DEFINE_ALIAS from ..fluid.layers import scatter_nd #DEFINE_ALIAS from ..fluid.layers import shard_index #DEFINE_ALIAS @@ -45,6 +41,7 @@ __all__ = [ 'cast', 'concat', 'expand', + 'broadcast_to', 'expand_as', 'flatten', 'gather', @@ -57,6 +54,7 @@ __all__ = [ 'shard_index', 'slice', 'split', + 'chunk' 'squeeze', 'stack', 'strided_slice', @@ -68,6 +66,7 @@ __all__ = [ 'flip', 'unbind', 'roll', + 'tile', ] @@ -110,9 +109,9 @@ def concat(x, axis=0, name=None): [14, 15, 16]]) in3 = np.array([[21, 22], [23, 24]]) - x1 = paddle.to_variable(in1) - x2 = paddle.to_variable(in2) - x3 = paddle.to_variable(in3) + x1 = paddle.to_tensor(in1) + x2 = paddle.to_tensor(in2) + x3 = paddle.to_tensor(in3) zero = paddle.full(shape=[1], dtype='int32', fill_value=0) # When the axis is negative, the real axis is (axis + Rank(x)) # As follow, axis is -1, Rank(x) is 2, the real axis is 1 @@ -376,7 +375,7 @@ def roll(x, shifts, axis=None, name=None): outputs={'Out': out}, attrs={'axis': axis, 'shifts': shifts}) - out = reshape(out, shape=origin_shape, inplace=True) + out = layers.reshape(out, shape=origin_shape, inplace=True) return out @@ -474,9 +473,6 @@ def stack(x, axis=0, name=None): def split(x, num_or_sections, axis=0, name=None): """ - :alias_main: paddle.split - :alias: paddle.tensor.split, paddle.tensor.manipulation.split - Split the input tensor into multiple sub-Tensors. Args: @@ -506,7 +502,7 @@ def split(x, num_or_sections, axis=0, name=None): paddle.disable_static() # x is a Tensor which shape is [3, 9, 5] x_np = np.random.random([3, 9, 5]).astype("int32") - x = paddle.to_variable(x_np) + x = paddle.to_tensor(x_np) out0, out1, out22 = paddle.split(x, num_or_sections=3, axis=1) # out0.shape [3, 3, 5] @@ -658,50 +654,46 @@ def unsqueeze(x, axis, name=None): return layers.unsqueeze(x, axis, name) -def gather(input, index, overwrite=True): +def gather(x, index, axis=None, name=None): """ - :alias_main: paddle.gather - :alias: paddle.gather,paddle.tensor.gather,paddle.tensor.manipulation.gather **Gather Layer** - Output is obtained by gathering entries of the outer-most dimension - of X indexed by `index` and concatenate them together. - - .. math:: - - Out = X[Index] - + Output is obtained by gathering entries of ``axis`` + of ``x`` indexed by ``index`` and concatenate them together. .. code-block:: text Given: - X = [[1, 2], + x = [[1, 2], [3, 4], [5, 6]] - Index = [1, 2] + index = [1, 2] + axis=[0] Then: - Out = [[3, 4], + out = [[3, 4], [5, 6]] Args: - input (Variable): The source input tensor with rank>=1. Supported data type is + x (Tensor): The source input tensor with rank>=1. Supported data type is int32, int64, float32, float64 and uint8 (only for CPU), float16 (only for GPU). - index (Variable): The index input tensor with rank=1. Data type is int32 or int64. - overwrite (bool, optional): The mode that updating the grad when has same index. - If True, use the overwrite mode to update the grad of the same index, - if False, use the accumulate mode to update the grad of the same index. - Default value is True. - - + index (Tensor): The index input tensor with rank=1. Data type is int32 or int64. + axis (Tensor|int, optional): The axis of input to be gathered, it's can be int or a Tensor with data type is int32 or int64. The default value is None, if None, the ``axis`` is 0. + name (str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . Returns: - output (Variable): The output is a tensor with the same rank as input. + output (Tensor): The output is a tensor with the same rank as ``x``. + + Raises: + TypeError: ``x`` must be a Tensor and the data type of ``x`` must to be one of float16, float32, float64, int32, int64, uint8. + TypeError: ``index`` must be a Tensor and the data type of ``index`` must be int32 or int64. + TypeError: ``axis`` must be a Tensor or int and the data type of ``index`` must be int32 or int64 when it's a Tensor. Examples: @@ -709,26 +701,41 @@ def gather(input, index, overwrite=True): import numpy as np import paddle - import paddle.fluid as fluid - - with fluid.dygraph.guard(): - input_1 = np.array([[1,2],[3,4],[5,6]]) - index_1 = np.array([0,1]) - input = fluid.dygraph.to_variable(input_1) - index = fluid.dygraph.to_variable(index_1) - output = paddle.gather(input, index) - # expected output: [[1,2],[3,4]] + paddle.disable_static() + input_1 = np.array([[1,2],[3,4],[5,6]]) + index_1 = np.array([0,1]) + input = paddle.to_tensor(input_1) + index = paddle.to_tensor(index_1) + output = paddle.gather(input, index, axis=0) + # expected output: [[1,2],[3,4]] """ + if axis is None: + axis = 0 + axis_tensor = axis + if not isinstance(axis, Variable): + axis_tensor = fill_constant(shape=[1], dtype='int64', value=axis) + if in_dygraph_mode(): + return core.ops.gather(x, index, axis_tensor) + + check_variable_and_dtype( + x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64', 'uint8'], + 'gather') + check_variable_and_dtype(index, 'index', ['int32', 'int64'], 'gather') + if isinstance(axis, Variable): + check_variable_and_dtype(axis, 'axis', ['int32', 'int64'], 'gather') + else: + check_type(axis, 'axis', (int), 'gather') + helper = LayerHelper('gather', **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) helper.append_op( type="gather", - inputs={"X": input, - "Index": index}, - outputs={"Out": out}, - attrs={'overwrite': overwrite}) + inputs={"X": x, + "Index": index, + "Axis": axis_tensor}, + outputs={"Out": out}) return out @@ -787,3 +794,523 @@ def unbind(input, axis=0): outputs={"Out": outs}, attrs={"axis": axis}) return outs + + +def scatter(x, index, updates, overwrite=True, name=None): + """ + **Scatter Layer** + Output is obtained by updating the input on selected indices based on updates. + + .. code-block:: python + import numpy as np + #input: + x = np.array([[1, 1], [2, 2], [3, 3]]) + index = np.array([2, 1, 0, 1]) + # shape of updates should be the same as x + # shape of updates with dim > 1 should be the same as input + updates = np.array([[1, 1], [2, 2], [3, 3], [4, 4]]) + overwrite = False + # calculation: + if not overwrite: + for i in range(len(index)): + x[index[i]] = np.zeros((2)) + for i in range(len(index)): + if (overwrite): + x[index[i]] = updates[i] + else: + x[index[i]] += updates[i] + # output: + out = np.array([[3, 3], [6, 6], [1, 1]]) + out.shape # [3, 2] + + **NOTICE**: The order in which updates are applied is nondeterministic, + so the output will be nondeterministic if index contains duplicates. + + Args: + x (Tensor): The input N-D Tensor with ndim>=1. Data type can be float32, float64. + index (Tensor): The index 1-D Tensor. Data type can be int32, int64. The length of index cannot exceed updates's length, and the value in index cannot exceed input's length. + updates (Tensor): update input with updates parameter based on index. shape should be the same as input, and dim value with dim > 1 should be the same as input. + overwrite (bool): The mode that updating the output when there are same indices. + If True, use the overwrite mode to update the output of the same index, + if False, use the accumulate mode to update the output of the same index.Default value is True. + name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . + + Returns: + Tensor: The output is a Tensor with the same shape as x. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + + x_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float32) + index_data = np.array([2, 1, 0, 1]).astype(np.int64) + updates_data = np.array([[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float32) + + x = paddle.to_tensor(x_data) + index = paddle.to_tensor(index_data) + updates = paddle.to_tensor(updates_data) + + output1 = paddle.scatter(x, index, updates, overwrite=False) + # [[3., 3.], + # [6., 6.], + # [1., 1.]] + + output2 = paddle.scatter(x, index, updates, overwrite=True) + # CPU device: + # [[3., 3.], + # [4., 4.], + # [1., 1.]] + # GPU device maybe have two results because of the repeated numbers in index + # result 1: + # [[3., 3.], + # [4., 4.], + # [1., 1.]] + # result 2: + # [[3., 3.], + # [2., 2.], + # [1., 1.]] + """ + if in_dygraph_mode(): + return core.ops.scatter(x, index, updates, 'overwrite', overwrite) + + check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'scatter') + check_type(overwrite, 'overwrite', bool, 'scatter') + helper = LayerHelper('scatter', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type="scatter", + inputs={"X": x, + "Ids": index, + "Updates": updates}, + attrs={'overwrite': overwrite}, + outputs={"Out": out}) + return out + + +def chunk(x, chunks, axis=0, name=None): + """ + Split the input tensor into multiple sub-Tensors. + + Args: + x (Tensor): A N-D Tensor. The data type is bool, float16, float32, float64, int32 or int64. + chunks(int): The number of tensor to be split along the certain axis. + axis (int|Tensor, optional): The axis along which to split, it can be a scalar with type + ``int`` or a ``Tensor`` with shape [1] and data type ``int32`` or ``int64``. + If :math::`axis < 0`, the axis to split along is :math:`rank(x) + axis`. Default is 0. + name (str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . + Returns: + list(Tensor): The list of segmented Tensors. + Raises: + TypeError: The data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. + TypeError: ``chunks`` is not int. + TypeError: ``axis`` is not int or Tensor. the data type of ``axis`` must be int32 or int64 when it's a Tensor. + Example: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + # x is a Tensor which shape is [3, 9, 5] + x_np = np.random.random([3, 9, 5]).astype("int32") + x = paddle.to_tensor(x_np) + + out0, out1, out22 = paddle.chunk(x, chunks=3, axis=1) + # out0.shape [3, 3, 5] + # out1.shape [3, 3, 5] + # out2.shape [3, 3, 5] + + + # axis is negative, the real axis is (rank(x) + axis) which real + # value is 1. + out0, out1, out2 = paddle.chunk(x, chunks=3, axis=-2) + # out0.shape [3, 3, 5] + # out1.shape [3, 3, 5] + # out2.shape [3, 3, 5] + """ + check_type(chunks, 'chunks', (int), 'chunk') + return paddle.fluid.layers.split( + input=x, num_or_sections=chunks, dim=axis, name=name) + + +def tile(x, repeat_times, name=None): + """ + + Construct a new Tensor by repeating ``x`` the number of times given by ``repeat_times``. + After tiling, the value of the i'th dimension of the output is equal to ``x.shape[i]*repeat_times[i]``. + + Both the number of dimensions of ``x`` and the number of elements in ``repeat_times`` should be less than or equal to 6. + + Args: + x (Tensor): The input tensor, its data type should be bool, float32, float64, int32 or int64. + repeat_times (Tensor|tuple|list): The number of repeating times. If repeat_times is a list or tuple, all its elements + should be integers or 1-D Tensors with the data type int32. If repeat_times is a Tensor, it should be an 1-D Tensor with the data type int32. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + N-D Tensor. The data type is the same as ``x``. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + np_data = np.array([1, 2, 3]).astype('int32') + data = paddle.to_tensor(np_data) + out = paddle.tile(data, repeat_times=[2, 1]) + np_out = out.numpy() + # [[1, 2, 3], [1, 2, 3]] + + out = paddle.tile(data, repeat_times=[2, 2]) + np_out = out.numpy() + # [[1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3]] + + np_repeat_times = np.array([2, 1]).astype("int32") + repeat_times = paddle.to_tensor(np_repeat_times) + out = paddle.tile(data, repeat_times=repeat_times) + np_out = out.numpy() + # [[1, 2, 3], [1, 2, 3]] + """ + check_variable_and_dtype( + x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'tile') + check_type(repeat_times, 'repeat_times', (list, tuple, Variable), 'tile') + if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: + raise ValueError( + "When the date type is bool for the input 'x' of tile op, you " + "must set its stop_gradient to be True by " + "some_var.stop_gradient == True supporting some_var is the input.") + + if in_dygraph_mode(): + return core.ops.tile(x, 'repeat_times', repeat_times) + + helper = LayerHelper('tile', **locals()) + + inputs = {"X": [x]} + attrs = {} + + def get_attr_repeat_times(list_repeat_times): + attrs_repeat_times = [] + for idx, times in enumerate(list_repeat_times): + if isinstance(times, Variable): + attrs_repeat_times.append(-1) + else: + attrs_repeat_times.append(times) + assert times > 0, ( + "All elements in repeat_times must be positive for tile.") + return attrs_repeat_times + + if isinstance(repeat_times, Variable): + repeat_times.stop_gradient = True + inputs['RepeatTimes'] = repeat_times + attrs['repeat_times'] = [-1] + elif isinstance(repeat_times, (list, tuple)): + attrs['repeat_times'] = get_attr_repeat_times(repeat_times) + if utils._contain_var(repeat_times): + inputs['repeat_times_tensor'] = utils._convert_to_tensor_list( + repeat_times) + + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='tile', inputs=inputs, outputs={'Out': out}, attrs=attrs) + return out + + +def expand_as(x, y, name=None): + """ + + Expand the input tensor ``x`` to the same shape as the input tensor ``y``. + + Both the number of dimensions of ``x`` and ``y`` must be less than or equal to 6, and the number of dimensions of ``y`` must be greather than or equal to that of ``x``. The dimension to expand must have a value of 1. + + Args: + x (Tensor): The input tensor, its data type is bool, float32, float64, int32 or int64. + y (Tensor): The input tensor that gives the shape to expand to. + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + N-D Tensor: A Tensor with the same shape as ``y``. The data type is the same as ``x``. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + + np_data_x = np.array([1, 2, 3]).astype('int32') + np_data_y = np.array([[1, 2, 3], [4, 5, 6]]).astype('int32') + data_x = paddle.to_tensor(np_data_x) + data_y = paddle.to_tensor(np_data_y) + out = paddle.expand_as(data_x, data_y) + np_out = out.numpy() + # [[1, 2, 3], [1, 2, 3]] + """ + check_variable_and_dtype( + x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'expand_as') + check_type(y, 'y', Variable, 'expand_as') + + if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: + raise ValueError( + "When the data type of input 'x' for expand_as is bool, " + "you must set its stop_gradient to be False by " + "some_var.stop_gradient = True, supporting " + "some_var as the input 'x'.") + inputs = {"X": [x], "target_tensor": [y]} + + if in_dygraph_mode(): + return core.ops.expand_as_v2(x, y) + + helper = LayerHelper('expand_as', **locals()) + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_variable_for_type_inference(dtype) + helper.append_op(type='expand_as_v2', inputs=inputs, outputs={'Out': out}) + return out + + +def expand(x, shape, name=None): + """ + + Expand the input tensor to a given shape. + + Both the number of dimensions of ``x`` and the number of elements in ``shape`` should be less than or equal to 6. The dimension to expand must have a value 1. + + + Args: + x (Tensor): The input tensor, its data type is bool, float32, float64, int32 or int64. + shape (list|tuple|Tensor): The result shape after expanding. The data type is int32. If shape is a list or tuple, all its elements + should be integers or 1-D Tensors with the data type int32. If shape is a Tensor, it should be an 1-D Tensor with the data type int32. + The value -1 in shape means keeping the corresponding dimension unchanged. + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . + + Returns: + N-D Tensor: A Tensor with the given shape. The data type is the same as ``x``. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + np_data = np.array([1, 2, 3]).astype('int32') + data = paddle.to_tensor(np_data) + out = paddle.expand(data, shape=[2, 3]) + out = out.numpy() + # [[1, 2, 3], [1, 2, 3]] + """ + check_variable_and_dtype( + x, 'x', ['bool', 'float32', 'float64', 'int32', 'int64'], 'expand') + check_type(shape, 'shape', (list, tuple, Variable), 'expand') + + inputs = {"X": [x]} + attrs = {} + if convert_dtype(x.dtype) == 'bool' and x.stop_gradient == False: + raise ValueError("When the data type of input 'x' for expand is bool, " + "you must set its stop_gradient to be False by " + "some_var.stop_gradient = True, supporting " + "some_var as the input.") + + if in_dygraph_mode(): + return core.ops.expand_v2(x, 'shape', shape) + + helper = LayerHelper('expand', **locals()) + + def get_attr_expand_shape(list_expand_shape): + attrs_expand_shape = [] + for idx, shape in enumerate(list_expand_shape): + if isinstance(shape, Variable): + attrs_expand_shape.append(-1) + else: + attrs_expand_shape.append(shape) + assert shape > 0 or shape == -1, ( + "All elements in shape of expand must be positive or -1.") + return attrs_expand_shape + + if isinstance(shape, Variable): + shape.stop_gradient = True + inputs['Shape'] = shape + elif isinstance(shape, (list, tuple)): + attrs['shape'] = get_attr_expand_shape(shape) + if utils._contain_var(shape): + inputs['expand_shapes_tensor'] = utils._convert_to_tensor_list( + shape) + + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='expand_v2', inputs=inputs, outputs={'Out': out}, attrs=attrs) + return out + + +broadcast_to = expand + + +def reshape(x, shape, name=None): + """ + :alias_main: paddle.reshape + :alias: paddle.reshape,paddle.tensor.reshape,paddle.tensor.manipulation.reshape + + This operator changes the shape of ``x`` without changing its data. + + Some tricks exist when specifying the target shape. + + 1. -1 means the value of this dimension is inferred from the total element + number of x and remaining dimensions. Thus one and only one dimension can + be set -1. + + 2. 0 means the actual dimension value is going to be copied from the + corresponding dimension of x. The index of 0s in shape can not exceed + the dimension of x. + + Here are some examples to explain it. + + 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape + is [6, 8], the reshape operator will transform x into a 2-D tensor with + shape [6, 8] and leaving x's data unchanged. + + 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape + specified is [2, 3, -1, 2], the reshape operator will transform x into a + 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this + case, one dimension of the target shape is set to -1, the value of this + dimension is inferred from the total element number of x and remaining + dimensions. + + 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape + is [-1, 0, 3, 2], the reshape operator will transform x into a 4-D tensor + with shape [2, 4, 3, 2] and leaving x's data unchanged. In this case, + besides -1, 0 means the actual dimension value is going to be copied from + the corresponding dimension of x. + + Args: + x(Tensor): An N-D Tensor. The data type is ``float32``, ``float64``, ``int32`` or ``int64``. + shape(list|tuple|Tensor): Define the target shape. At most one dimension of the target shape can be -1. + The data type is ``int32`` . If ``shape`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. + If ``shape`` is an Tensor, it should be an 1-D Tensor . + name(str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . + + Returns: + Tensor: A reshaped Tensor with the same data type as ``x``. + + Raises: + ValueError: If more than one elements of ``shape`` is -1. + ValueError: If the element of ``shape`` is 0, the corresponding dimension should be less than or equal to the dimension of ``x``. + ValueError: If the elements in ``shape`` is negative except -1. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + + data = np.random.random([2, 4, 6]).astype("float32") + x = paddle.to_tensor(data) + + positive_four = paddle.fill_constant([1], "int32", 4) + + out_1 = paddle.reshape(x, [-1, 0, 3, 2]) + # the shape of out_1 is [2,4,3,2]. + + out_2 = paddle.reshape(x, shape=[positive_four, 12]) + # the shape of out_2 is [4, 12]. + + shape_tensor = paddle.to_tensor(np.array([8, 6]).astype("int32")) + out_3 = paddle.reshape(x, shape=shape_tensor) + # the shape of out_2 is [8, 6]. + """ + return paddle.fluid.layers.reshape(x=x, shape=shape, name=name) + + +def gather_nd(x, index, name=None): + """ + + This function is actually a high-dimensional extension of :code:`gather` + and supports for simultaneous indexing by multiple axes. :attr:`index` is a + K-dimensional integer tensor, which is regarded as a (K-1)-dimensional + tensor of :attr:`index` into :attr:`input`, where each element defines + a slice of params: + + .. math:: + + output[(i_0, ..., i_{K-2})] = input[index[(i_0, ..., i_{K-2})]] + + Obviously, :code:`index.shape[-1] <= input.rank` . And, the output tensor has + shape :code:`index.shape[:-1] + input.shape[index.shape[-1]:]` . + + .. code-block:: text + + Given: + x = [[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]] + x.shape = (2, 3, 4) + + * Case 1: + index = [[1]] + + gather_nd(x, index) + = [x[1, :, :]] + = [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]] + + * Case 2: + index = [[0,2]] + + gather_nd(x, index) + = [x[0, 2, :]] + = [8, 9, 10, 11] + + * Case 3: + index = [[1, 2, 3]] + + gather_nd(x, index) + = [x[1, 2, 3]] + = [23] + + Args: + x (Tensor): The input Tensor which it's data type should be bool, float32, float64, int32, int64. + index (Tensor): The index input with rank > 1, index.shape[-1] <= input.rank. + Its dtype should be int32, int64. + name(str, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . + + Returns: + output (Tensor): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:] + + Raises: + TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of float32, float64, int32 and int64. + TypeError: ``index`` must be a Tensor and the data type of ``index`` must be one of int32 and int64. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + np_x = np.array([[[1, 2], [3, 4], [5, 6]], + [[7, 8], [9, 10], [11, 12]]]) + np_index = [[0, 1]] + x = paddle.to_tensor(np_x) + index = paddle.to_tensor(np_index) + + output = paddle.gather_nd(x, index) #[[3, 4]] + + """ + + return paddle.fluid.layers.gather_nd(input=x, index=index, name=name) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 8827a0dab395db745cc4ee4bd969dff29f125136..85441597cf56c61ef33d552ec6a9f7c5019b4ec8 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -51,18 +51,18 @@ from ..fluid.layers import reduce_sum #DEFINE_ALIAS from ..fluid.layers import round #DEFINE_ALIAS from ..fluid.layers import rsqrt #DEFINE_ALIAS from ..fluid.layers import scale #DEFINE_ALIAS -from ..fluid.layers import sign #DEFINE_ALIAS from ..fluid.layers import square #DEFINE_ALIAS from ..fluid.layers import stanh #DEFINE_ALIAS from ..fluid.layers import atan #DEFINE_ALIAS from ..fluid.layers import erf #DEFINE_ALIAS from ..fluid.layers import sqrt #DEFINE_ALIAS from ..fluid.layers import sin #DEFINE_ALIAS -from ..fluid.layers import tanh #DEFINE_ALIAS from ..fluid.layers import increment #DEFINE_ALIAS from ..fluid.layers import multiplex #DEFINE_ALIAS from ..fluid.layers import sums #DEFINE_ALIAS +from ..fluid import layers +import paddle __all__ = [ 'abs', @@ -83,8 +83,10 @@ __all__ = [ 'floor', 'increment', 'log', + 'logsumexp', 'mul', 'multiplex', + 'prod', 'pow', 'reciprocal', 'reduce_max', @@ -109,7 +111,11 @@ __all__ = [ 'min', 'minimum', 'mm', - 'div', + 'divide', + 'floor_divide', + 'remainder', + 'mod', + 'floor_mod', 'multiply', 'add', 'atan', @@ -119,12 +125,28 @@ __all__ = [ 'erf', 'addcmul', 'addmm', - 'clamp', + 'clip', 'trace', - 'kron' + 'kron', + 'isfinite', + 'isinf', + 'isnan' ] # yapf: enable. +_supported_int_dtype_ = [ + VarDesc.VarType.UINT8, + VarDesc.VarType.INT8, + VarDesc.VarType.INT16, + VarDesc.VarType.INT32, + VarDesc.VarType.INT64, +] + +_supported_float_dtype_ = [ + VarDesc.VarType.FP32, + VarDesc.VarType.FP64, +] + @templatedoc() def pow(input, exponent, name=None): """ @@ -233,246 +255,311 @@ def _elementwise_op(helper): return helper.append_activation(out) -def add(x, y, alpha=1, name=None): +def add(x, y, name=None): """ Examples: - .. code-block:: python + .. code-block:: python import paddle - import paddle.fluid as fluid import numpy as np - def gen_data(): - return { - "x": np.array([2, 3, 4]).astype('float32'), - "y": np.array([1, 5, 2]).astype('float32') - } + paddle.disable_static() + np_x = np.array([2, 3, 4]).astype('float64') + np_y = np.array([1, 5, 2]).astype('float64') + x = paddle.to_variable(np_x) + y = paddle.to_variable(np_y) + z = paddle.add(x, y) + np_z = z.numpy() + print(np_z) # [3., 8., 6. ] - x = fluid.data(name="x", shape=[3], dtype='float32') - y = fluid.data(name="y", shape=[3], dtype='float32') - z1 = paddle.add(x, y) - z2 = paddle.add(x, y, alpha=10) - # z = x + y + """ + op_type = 'elementwise_add' + axis = -1 + if in_dygraph_mode(): + return _elementwise_op_in_dygraph( + x, y, axis=axis, op_name=op_type) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - z_value = exe.run(feed=gen_data(), - fetch_list=[z1.name, z2.name]) + return _elementwise_op(LayerHelper(op_type, **locals())) - print(z_value[0]) # [3., 8., 6.] - print(z_value[1]) # [12. 53. 24.] +def divide(x, y, name=None): + """ + Divide two tensors element-wise. The equation is: - .. code-block:: python + .. math:: + out = x / y - import paddle - import paddle.fluid as fluid - import numpy as np + **Note**: + ``paddle.divide`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` . - def gen_data(): - return { - "x": np.ones((2, 3, 4, 5)).astype('float32'), - "y": np.zeros((4, 5)).astype('float32') - } + Args: + x (Tensor): the input tensor, it's data type should be float32, float64, int32, int64. + y (Tensor): the input tensor, it's data type should be float32, float64, int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - x = fluid.data(name="x", shape=[2, 3, 4, 5], dtype='float32') - y = fluid.data(name="y", shape=[4, 5], dtype='float32') - z = paddle.add(x, y, name='z') - # z = x + y + Returns: + N-D Tensor. A location into which the result is stored. It's dimension equals with $x$. - place = fluid.CPUPlace() - exe = fluid.Executor(place) + Examples: - z_value = exe.run(feed=gen_data(), - fetch_list=[z.name]) + .. code-block:: python - print(z_value[0]) - print(z_value[0].shape) # z.shape=[2,3,4,5] + import paddle + import numpy as np + paddle.disable_static() - .. code-block:: python + np_x = np.array([2, 3, 4]).astype('float64') + np_y = np.array([1, 5, 2]).astype('float64') + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = paddle.divide(x, y) + print(z.numpy()) # [2., 0.6, 2.] - import paddle - import paddle.fluid as fluid - import numpy as np + """ + op_type = 'elementwise_div' + axis = -1 + act = None + if in_dygraph_mode(): + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("divide(): arguments must be Tensor or scalar, not numpy.ndarray.") + + # rule 2: both the inputs are not Tensor + elif not isinstance(x, paddle.Tensor) and not isinstance(y, paddle.Tensor): + x = paddle.full(shape=[1], dtype=paddle.get_default_dtype(), fill_value=x) + y = paddle.full(shape=[1], dtype=paddle.get_default_dtype(), fill_value=y) + + # rule 3: both the inputs are Tensor + elif isinstance(x, paddle.Tensor) and isinstance(y, paddle.Tensor): + if y.dtype != x.dtype: + raise TypeError("divide(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) + elif x.dtype in _supported_int_dtype_: + x = x.astype(paddle.get_default_dtype()) + y = y.astype(paddle.get_default_dtype()) + + # rule 4: x is Tensor, y is scalar + elif isinstance(x, paddle.Tensor) and not isinstance(y, paddle.Tensor): + if x.dtype in _supported_int_dtype_: + x = x.astype(paddle.get_default_dtype()) + y = paddle.full(shape=[1], dtype=x.dtype, fill_value=y) + + # rule 5: x is scalar, y is Tensor + elif not isinstance(x, paddle.Tensor) and isinstance(y, paddle.Tensor): + if y.dtype in _supported_int_dtype_: + y = y.astype(paddle.get_default_dtype()) + x = paddle.full(shape=[1], dtype=y.dtype, fill_value=x) - def gen_data(): - return { - "x": np.random.randint(1, 5, size=[2, 3, 4, 5]).astype('float32'), - "y": np.random.randint(1, 5, size=[5]).astype('float32') - } + return _elementwise_op_in_dygraph( + x, y, axis=axis, act=act, op_name=op_type) - x = fluid.data(name="x", shape=[2,3,4,5], dtype='float32') - y = fluid.data(name="y", shape=[5], dtype='float32') - z = paddle.add(x, y) - # z = x / y + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("divide(): arguments must be Tensor or scalar, not numpy.ndarray.") + + # rule 2: both the inputs are not Tensor + elif not isinstance(x, Variable) and not isinstance(y, Variable): + x = paddle.fill_constant(shape=[1], dtype=paddle.get_default_dtype(), value=x) + y = paddle.fill_constant(shape=[1], dtype=paddle.get_default_dtype(), value=y) + + # rule 3: both the inputs are Tensor + elif isinstance(x, Variable) and isinstance(y, Variable): + if y.dtype != x.dtype: + raise TypeError("divide(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) + elif x.dtype in _supported_int_dtype_: + x = paddle.cast(x, paddle.get_default_dtype()) + y = paddle.cast(y, paddle.get_default_dtype()) + + # rule 4: x is Tensor, y is scalar + elif isinstance(x, Variable) and not isinstance(y, Variable): + if x.dtype in _supported_int_dtype_: + x = paddle.cast(x, paddle.get_default_dtype()) + y = paddle.fill_constant(shape=[1], dtype=x.dtype, value=y) + + # rule 5: x is scalar, y is Tensor + elif not isinstance(x, Variable) and isinstance(y, Variable): + if y.dtype in _supported_int_dtype_: + y = paddle.cast(y, paddle.get_default_dtype()) + x = paddle.fill_constant(shape=[1], dtype=y.dtype, value=x) - place = fluid.CPUPlace() - exe = fluid.Executor(place) + return _elementwise_op(LayerHelper(op_type, **locals())) - z_value = exe.run(feed=gen_data(), - fetch_list=[z.name]) - print(z_value[0]) - print(z_value[0].shape) # z.shape=[2,3,4,5] +def floor_divide(x, y, name=None): + """ + Floor divide two tensors element-wise. The equation is: - .. code-block:: python + .. math:: + out = x // y - import paddle - import paddle.fluid as fluid - import numpy as np + **Note**: + ``paddle.floor_divide`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` . - x = fluid.data(name="x", shape=[3], dtype="float32") - y = fluid.data(name='y', shape=[3], dtype='float32') - z = paddle.add(x, y) + Args: + x (Tensor): the input tensor, it's data type should be int32, int64. + y (Tensor): the input tensor, it's data type should be int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - place = fluid.CPUPlace() - exe = fluid.Executor(place) - data1 = np.array([2, 3, 4], dtype='float32') - data2 = np.array([1, 5, 2], dtype='float32') - z_value = exe.run(feed={'x': data1, - 'y': data2}, - fetch_list=[z]) - print(z_value[0]) # [3. 8. 6.] + Returns: + N-D Tensor. A location into which the result is stored. It's dimension equals with $x$. + Examples: - .. code-block:: python + .. code-block:: python - import paddle - import paddle.fluid as fluid - import numpy as np + import paddle + import numpy as np - with fluid.dygraph.guard(): - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') - x = fluid.dygraph.to_variable(np_x) - y = fluid.dygraph.to_variable(np_y) - z = paddle.add(x, y, alpha=-0.5) - np_z = z.numpy() - print(np_z) # [1.5, 0.5, 3. ] + paddle.disable_static() + + np_x = np.array([2, 3, 8, 7]) + np_y = np.array([1, 5, 3, 3]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = paddle.floor_divide(x, y) + print(z.numpy()) # [2, 0, 2, 2] """ - op_type = 'elementwise_add' + op_type = 'elementwise_floordiv' axis = -1 - act = None - if alpha != 1: - y = scale(y, scale=alpha) if in_dygraph_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name=op_type) + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("floor_divide(): arguments must be Tensor or scalar, not numpy.ndarray.") - return _elementwise_op(LayerHelper(op_type, **locals())) + # rule 2: both the inputs are not Tensor + elif not isinstance(x, paddle.Tensor) and not isinstance(y, paddle.Tensor): + x = paddle.full(shape=[1], dtype=paddle.get_default_dtype(), fill_value=x) + y = paddle.full(shape=[1], dtype=paddle.get_default_dtype(), fill_value=y) + # rule 3: both the inputs are Tensor + elif isinstance(x, paddle.Tensor) and isinstance(y, paddle.Tensor): + if y.dtype != x.dtype: + raise TypeError("floor_divide(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) -def div(x, y, name=None): - """ -Examples: + # rule 4: x is Tensor, y is scalar + elif isinstance(x, paddle.Tensor) and not isinstance(y, paddle.Tensor): + y = paddle.full(shape=[1], dtype=x.dtype, fill_value=y) - .. code-block:: python + # rule 5: x is scalar, y is Tensor + elif not isinstance(x, paddle.Tensor) and isinstance(y, paddle.Tensor): + x = paddle.full(shape=[1], dtype=y.dtype, fill_value=x) - import paddle - import paddle.fluid as fluid - import numpy as np + return _elementwise_op_in_dygraph( + x, y, axis=axis, op_name=op_type) - def gen_data(): - return { - "x": np.array([2, 3, 4]).astype('float32'), - "y": np.array([1, 5, 2]).astype('float32') - } + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("divide(): arguments must be Tensor or scalar, not numpy.ndarray.") - x = fluid.data(name="x", shape=[3], dtype='float32') - y = fluid.data(name="y", shape=[3], dtype='float32') - z = paddle.div(x, y) - # z = x / y + # rule 2: both the inputs are not Tensor + elif not isinstance(x, Variable) and not isinstance(y, Variable): + x = paddle.fill_constant(shape=[1], dtype=paddle.get_default_dtype(), value=x) + y = paddle.fill_constant(shape=[1], dtype=paddle.get_default_dtype(), value=y) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - z_value = exe.run(feed=gen_data(), - fetch_list=[z.name]) + # rule 3: both the inputs are Tensor + elif isinstance(x, Variable) and isinstance(y, Variable): + if y.dtype != x.dtype: + raise TypeError("divide(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) - print(z_value) # [2., 0.6, 2.] + # rule 4: x is Tensor, y is scalar + elif isinstance(x, Variable) and not isinstance(y, Variable): + y = paddle.fill_constant(shape=[1], dtype=x.dtype, value=y) + # rule 5: x is scalar, y is Tensor + elif not isinstance(x, Variable) and isinstance(y, Variable): + x = paddle.fill_constant(shape=[1], dtype=y.dtype, value=x) - .. code-block:: python + return _elementwise_op(LayerHelper(op_type, **locals())) - import paddle - import paddle.fluid as fluid - import numpy as np - def gen_data(): - return { - "x": np.ones((2, 3, 4, 5)).astype('float32'), - "y": np.zeros((4, 5)).astype('float32') - } +def remainder(x, y, name=None): + """ + Mod two tensors element-wise. The equation is: - x = fluid.data(name="x", shape=[2, 3, 4, 5], dtype='float32') - y = fluid.data(name="y", shape=[4, 5], dtype='float32') - z = paddle.div(x, y, name='z') - # z = x / y + .. math:: + out = x \% y - place = fluid.CPUPlace() - exe = fluid.Executor(place) + **Note**: + ``paddle.remainder`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` . - z_value = exe.run(feed=gen_data(), - fetch_list=[z.name]) + Args: + x (Tensor): the input tensor, it's data type should be int32, int64. + y (Tensor): the input tensor, it's data type should be int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. - print(z_value[0]) - print(z_value[0].shape) # z.shape=[2,3,4,5] + Returns: + N-D Tensor. A location into which the result is stored. It's dimension equals with $x$. + Examples: - .. code-block:: python + .. code-block:: python - import paddle - import paddle.fluid as fluid - import numpy as np + import paddle + import numpy as np - def gen_data(): - return { - "x": np.random.randint(1, 5, size=[2, 3, 4, 5]).astype('float32'), - "y": np.random.randint(1, 5, size=[5]).astype('float32') - } + paddle.disable_static() - x = fluid.data(name="x", shape=[2,3,4,5], dtype='float32') - y = fluid.data(name="y", shape=[5], dtype='float32') - z = paddle.div(x, y) - # z = x / y + np_x = np.array([2, 3, 8, 7]) + np_y = np.array([1, 5, 3, 3]) + x = paddle.to_tensor(np_x) + y = paddle.to_tensor(np_y) + z = paddle.remainder(x, y) + print(z.numpy()) # [0, 3, 2, 1] - place = fluid.CPUPlace() - exe = fluid.Executor(place) + """ + op_type = 'elementwise_mod' + axis = -1 + if in_dygraph_mode(): + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("remainder(): arguments must be Tensor or scalar, not numpy.ndarray.") - z_value = exe.run(feed=gen_data(), - fetch_list=[z.name]) - print(z_value[0]) - print(z_value[0].shape) # z.shape=[2,3,4,5] + elif not isinstance(x, paddle.Tensor): + raise TypeError("remainder(): arguments position 1 must be Tensor, not {}".format(type(x))) + # rule 3: both the inputs are Tensor + elif isinstance(y, paddle.Tensor): + if y.dtype != x.dtype: + raise TypeError("remainder(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) - .. code-block:: python + # rule 4: x is Tensor, y is scalar + elif not isinstance(y, paddle.Tensor): + y = paddle.full(shape=[1], dtype=x.dtype, fill_value=y) - import paddle - import paddle.fluid as fluid - import numpy as np + return _elementwise_op_in_dygraph( + x, y, axis=axis, op_name=op_type) - with fluid.dygraph.guard(fluid.CPUPlace()): - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') - x = fluid.dygraph.to_variable(np_x) - y = fluid.dygraph.to_variable(np_y) - z = paddle.div(x, y) - np_z = z.numpy() - print(np_z) # [2., 0.6, 2.] + # rule 1 : avoid numpy.ndarray + if isinstance(x, numpy.ndarray) or isinstance(y, numpy.ndarray): + raise TypeError("remainder(): arguments must be Tensor or scalar, not numpy.ndarray.") - """ - op_type = 'elementwise_div' - axis = -1 - act = None - if in_dygraph_mode(): - return _elementwise_op_in_dygraph( - x, y, axis=axis, act=act, op_name=op_type) + elif not isinstance(x, Variable): + raise TypeError("remainder(): arguments position 1 must be Tensor, not {}".format(type(x))) + + # rule 3: both the inputs are Tensor + elif isinstance(y, Variable): + if y.dtype != x.dtype: + raise TypeError("remainder(): argument position 1 and argument position 2 must have the same dtype." + "But x is {}, y is {}".format(x.dtype, y.dtype)) + + # rule 4: x is Tensor, y is scalar + elif not isinstance(y, paddle.Tensor): + y = paddle.fill_constant(shape=[1], dtype=x.dtype, value=y) return _elementwise_op(LayerHelper(op_type, **locals())) +mod = remainder #DEFINE_ALIAS +floor_mod = remainder #DEFINE_ALIAS + + def multiply(x, y, axis=-1, name=None): """ :alias_main: paddle.multiply @@ -614,106 +701,105 @@ Examples: for func in [ add, - div, maximum, minimum, multiply ]: proto_dict = {'add': 'elementwise_add', 'div': 'elementwise_div', 'maximum': 'elementwise_max', 'minimum': 'elementwise_min', 'multiply': 'elementwise_mul'} op_proto = OpProtoHolder.instance().get_op_proto(proto_dict[func.__name__]) - if func.__name__ in ['add']: - alias_main = ':alias_main: paddle.%(func)s' % {'func': func.__name__} - alias = ':alias: paddle.%(func)s, paddle.tensor.%(func)s, paddle.tensor.math.%(func)s' % {'func': func.__name__} - - additional_args_lines = [ - "alpha (int|float, optional): The alpha factor of the input. Default is 1. If alpha is not 1, the equation becomes Out = X + alpha * Y.", - "name (string, optional): Name of the output. \ - Default is None. It's used to print debug info for developers. Details: \ - :ref:`api_guide_Name` " - ] - else: - additional_args_lines = [ - "name (string, optional): Name of the output. \ - Default is None. It's used to print debug info for developers. Details: \ - :ref:`api_guide_Name` " - ] - func.__doc__ = alias_main + """\n""" + alias + """\n""" + _generate_doc_string_( + additional_args_lines = [ + "name (string, optional): Name of the output. \ + Default is None. It's used to print debug info for developers. Details: \ + :ref:`api_guide_Name` " + ] + + func.__doc__ = _generate_doc_string_( op_proto, additional_args_lines=additional_args_lines, skip_attrs_set={"x_data_format", "y_data_format", "axis", "use_quantizer", "mkldnn_data_type", "Scale_x", "Scale_y", "Scale_out" }) + """\n""" + str(func.__doc__) -def sum(input, dim=None, dtype=None, keep_dim=False, name=None): - """ - :alias_main: paddle.sum - :alias: paddle.sum,paddle.tensor.sum,paddle.tensor.math.sum +def sum(x, axis=None, dtype=None, keepdim=False, name=None): + """ Computes the sum of tensor elements over the given dimension. Args: - input (Variable): The input variable which is a Tensor, the data type is float32, - float64, int32, int64. - dim (list|int, optional): The dimensions along which the sum is performed. If - :attr:`None`, sum all elements of :attr:`input` and return a + x (Tensor): An N-D Tensor, the data type is float32, float64, int32 or int64. + axis (int|list|tuple, optional): The dimensions along which the sum is performed. If + :attr:`None`, sum all elements of :attr:`x` and return a Tensor variable with a single element, otherwise must be in the - range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`, - the dimension to reduce is :math:`rank + dim[i]`. - dtype(str, optional): The dtype of output tensor. The default value is None, the dtype - of output is the same as input tensor. - keep_dim (bool, optional): Whether to reserve the reduced dimension in the - output Tensor. The result tensor will have one fewer dimension - than the :attr:`input` unless :attr:`keep_dim` is true, default + range :math:`[-rank(x), rank(x))`. If :math:`axis[i] < 0`, + the dimension to reduce is :math:`rank + axis[i]`. + dtype (str, optional): The dtype of output Tensor. The default value is None, the dtype + of output is the same as input Tensor `x`. + keepdim (bool, optional): Whether to reserve the reduced dimension in the + output Tensor. The result Tensor will have one fewer dimension + than the :attr:`x` unless :attr:`keepdim` is true, default value is False. - name(str, optional): The default value is None. Normally there is no need for + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` Returns: - Variable: Tensor, results of summation operation on the specified dim of input tensor, - it's data type is the same as input's Tensor. + Tensor: Results of summation operation on the specified axis of input Tensor `x`, + it's data type is the same as `x`. Raises: - ValueError, the :attr:`dtype` must be float64 or int64. + ValueError: The :attr:`dtype` must be float64 or int64. + TypeError: The type of :attr:`axis` must be int, list or tuple. Examples: .. code-block:: python + import numpy as np import paddle - import paddle.fluid as fluid + paddle.disable_static() + # x is a Tensor variable with following elements: # [[0.2, 0.3, 0.5, 0.9] # [0.1, 0.2, 0.6, 0.7]] # Each example is followed by the corresponding output tensor. - x = fluid.data(name='x', shape=[2, 4], dtype='float32') + x_data = np.array([[0.2, 0.3, 0.5, 0.9],[0.1, 0.2, 0.6, 0.7]]).astype('float32') + x = paddle.to_variable(x_data) out1 = paddle.sum(x) # [3.5] - out2 = paddle.sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6] - out3 = paddle.sum(x, dim=-1) # [1.9, 1.6] - out4 = paddle.sum(x, dim=1, keep_dim=True) # [[1.9], [1.6]] + out2 = paddle.sum(x, axis=0) # [0.3, 0.5, 1.1, 1.6] + out3 = paddle.sum(x, axis=-1) # [1.9, 1.6] + out4 = paddle.sum(x, axis=1, keepdim=True) # [[1.9], [1.6]] # y is a Tensor variable with shape [2, 2, 2] and elements as below: # [[[1, 2], [3, 4]], # [[5, 6], [7, 8]]] # Each example is followed by the corresponding output tensor. - y = fluid.data(name='y', shape=[2, 2, 2], dtype='float32') - out5 = paddle.sum(y, dim=[1, 2]) # [10, 26] - out6 = paddle.sum(y, dim=[0, 1]) # [16, 20] - + y_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype('float32') + y = paddle.to_variable(y_data) + out5 = paddle.sum(y, axis=[1, 2]) # [10, 26] + out6 = paddle.sum(y, axis=[0, 1]) # [16, 20] """ - if dim is not None and not isinstance(dim, list): - dim = [dim] + if axis is not None and not isinstance(axis, (list, tuple)): + axis = [axis] + + if not axis: + reduce_all_flag = True + else: + if len(axis) == len(x.shape): + reduce_all_flag = True + else: + reduce_all_flag = False + attrs = { - 'dim': dim if dim != None and dim != [] else [0], - 'keep_dim': keep_dim, - 'reduce_all': True if dim == None or dim == [] else False, + 'dim': axis if axis != None and axis != [] and axis != () else [0], + 'keep_dim': keepdim, + 'reduce_all': reduce_all_flag } dtype_flag = False if dtype is not None: if dtype in ['float64', 'int64']: - if (convert_dtype(input.dtype) == "float32" and dtype == "float64") or \ - (convert_dtype(input.dtype) == "int32" and dtype == "int64"): + if (convert_dtype(x.dtype) == "float32" and dtype == "float64") or \ + (convert_dtype(x.dtype) == "int32" and dtype == "int64"): attrs.update({ - 'in_dtype': input.dtype, + 'in_dtype': x.dtype, 'out_dtype': convert_np_dtype_to_dtype_(dtype) }) dtype_flag = True @@ -723,27 +809,28 @@ def sum(input, dim=None, dtype=None, keep_dim=False, name=None): format(dtype)) if in_dygraph_mode(): - reduce_all = True if dim == None or dim == [] else False - dim = dim if dim != None and dim != [] else [0] + axis = axis if axis != None and axis != [] else [0] if dtype_flag: - return core.ops.reduce_sum(input, 'dim', dim, 'keep_dim', keep_dim, - 'reduce_all', reduce_all, 'in_dtype', - input.dtype, 'out_dtype', + return core.ops.reduce_sum(x, 'dim', axis, 'keep_dim', keepdim, + 'reduce_all', reduce_all_flag, 'in_dtype', + x.dtype, 'out_dtype', convert_np_dtype_to_dtype_(dtype)) else: - return core.ops.reduce_sum(input, 'dim', dim, 'keep_dim', keep_dim, - 'reduce_all', reduce_all) + return core.ops.reduce_sum(x, 'dim', axis, 'keep_dim', keepdim, + 'reduce_all', reduce_all_flag) check_variable_and_dtype( - input, 'input', ['float32', 'float64', 'int32', 'int64'], 'reduce_sum') + x, 'x', ['float32', 'float64', 'int32', 'int64'], 'sum') + check_type(axis, 'axis', (int, list, tuple, type(None)), 'sum') + helper = LayerHelper('sum', **locals()) if dtype_flag: out = helper.create_variable_for_type_inference( dtype=convert_np_dtype_to_dtype_(dtype)) else: - out = helper.create_variable_for_type_inference(dtype=input.dtype) + out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( type='reduce_sum', - inputs={'X': input}, + inputs={'X': x}, outputs={'Out': out}, attrs=attrs) return out @@ -1035,69 +1122,73 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None): return out -def logsumexp(x, dim=None, keepdim=False, name=None): +def logsumexp(x, axis=None, keepdim=False, name=None): """ - :alias_main: paddle.logsumexp - :alias: paddle.logsumexp,paddle.tensor.logsumexp,paddle.tensor.math.logsumexp - - This operator calculates the log of the sum of exponentials of the input Tensor. + This OP calculates the log of the sum of exponentials of ``x`` along ``axis`` . .. math:: logsumexp(x) = \log\sum exp(x) - - Parameters: - x (Variable): Input LoDTensor or Tensor. Must be one of the following types: float32, float64. - dim (list|int, optional): The dimensions along which the sum is performed. If :attr:`None`, - sum all elements of :attr:`input` and return a Tensor variable with a single element, - otherwise must be in the range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`, - the dimension to reduce is :math:`rank + dim[i]`. - keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. - The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` - is true, default value is False. - name (str, optional): The default value is None. Normally there is no need for user to - set this property. For more information, please refer to :ref:`api_guide_Name` + Args: + x (Tensor): The input Tensor with data type float32, float64. + axis (int|list|tuple, optional): The axis along which to perform + logsumexp calculations. ``axis`` should be int, list(int) or + tuple(int). If ``axis`` is a list/tuple of dimension(s), logsumexp + is calculated along all element(s) of ``axis`` . ``axis`` or + element(s) of ``axis`` should be in range [-D, D), where D is the + dimensions of ``x`` . If ``axis`` or element(s) of ``axis`` is + less than 0, it works the same way as :math:`axis + D` . If + ``axis`` is None, logsumexp is calculated along all elements of + ``x``. Default is None. + keepdim (bool, optional): Whether to reserve the reduced dimension(s) + in the output Tensor. If ``keep_dim`` is True, the dimensions of + the output Tensor is the same as ``x`` except in the reduced + dimensions(it is of size 1 in this case). Otherwise, the shape of + the output Tensor is squeezed in ``axis`` . Default is False. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: The calcuated result Tensor/LoDTensor. + Tensor, results of logsumexp along ``axis`` of ``x``, with the same data + type as ``x``. Examples: .. code-block:: python import paddle - import paddle.fluid as fluid import numpy as np - with fluid.dygraph.guard(): - np_x = np.random.uniform(0.1, 1, [10]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - print(paddle.logsumexp(x).numpy()) - - .. code-block:: python - - import paddle - import paddle.fluid as fluid - import numpy as np + paddle.disable_static() - with fluid.dygraph.guard(): - np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32) - x = fluid.dygraph.to_variable(np_x) - print(paddle.logsumexp(x, dim=1).numpy()) - print(paddle.logsumexp(x, dim=[0, 2]).numpy()) + x = np.array([[-1.5, 0., 2.], [3., 1.2, -2.4]]) + x = paddle.to_tensor(x) + out1 = paddle.logsumexp(x) # [3.4691226] + out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602] """ - op_type = 'logsumexp' - assert x is not None, 'x cannot be None in {}'.format(op_type) + if isinstance(axis, int): + axis = [axis] + reduce_all = True if axis is None \ + or len(axis)==0 \ + or len(axis) == len(x.shape) else False + if axis is None or len(axis) == 0: + axis = [0] - # reduce_sum does not support float16 - check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type) - - exp_out = layers.exp(x) - sum_out = layers.reduce_sum(exp_out, dim, keepdim) + if in_dygraph_mode(): + return core.ops.logsumexp(x, 'dim', axis, 'keep_dim', keepdim, + 'reduce_all', reduce_all) - return layers.log(sum_out, name) + check_variable_and_dtype(x, 'x', + ['float32', 'float64'], + 'logsumexp') + helper = LayerHelper('logsumexp', **locals()) + attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all} + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op( + type='logsumexp', inputs={'X': x}, outputs={'Out': out}, attrs=attrs) + return out def inverse(x, name=None): @@ -1427,14 +1518,14 @@ def addcmul(input, tensor1, tensor2, value=1.0, name=None): return out -def clamp(input, min=None, max=None, name=None): +def clip(x, min=None, max=None, name=None): """ - :alias_main: paddle.clamp - :alias: paddle.clamp,paddle.tensor.clamp,paddle.tensor.math.clamp + :alias_main: paddle.clip + :alias: paddle.clip,paddle.tensor.clip,paddle.tensor.math.clip - **clampe layer** + **clip layer** - This operator clamps all elements in input into the range [ min, max ] and return + This operator clip all elements in input into the range [ min, max ] and return a resulting tensor as the following equation: .. math:: @@ -1442,38 +1533,35 @@ def clamp(input, min=None, max=None, name=None): Out = MIN(MAX(x, min), max) Args: - input (Variable): An input N-D Tensor or LoDTensor - with data type float32, float64. - min (float32|Variable): The lower bound with type ``float32`` or a ``Tensor`` + x (Tensor): An N-D Tensor with data type float32 or float64. + min (float32|Tensor): The lower bound with type ``float32`` or a ``Tensor`` with shape [1] and type ``int32``, ``float32``, ``float64``. - max (float32|Variable): The upper bound with type ``float32`` or a ``Tensor`` + max (float32|Tensor): The upper bound with type ``float32`` or a ``Tensor`` with shape [1] and type ``int32``, ``float32``, ``float64``. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: A Tensor or LodTensor with the same data type and data shape as input's. + Tensor: A Tensor with the same data type and data shape as input. Examples: .. code-block:: python import paddle - import paddle.fluid as fluid import numpy as np - in1 = np.array([[1.2,3.5], - [4.5,6.4]]).astype('float32') - with fluid.dygraph.guard(): - x1 = fluid.dygraph.to_variable(in1) - out1 = paddle.tensor.clamp(x1, min=3.5, max=5.0) - out2 = paddle.tensor.clamp(x1, min=2.5) - print(out1.numpy()) - # [[3.5, 3.5] - # [4.5, 5.0]] - print(out2.numpy()) - # [[2.5, 3.5] - # [[4.5, 6.4] + paddle.disable_static() + x = np.array([[1.2,3.5], [4.5,6.4]]).astype('float32') + x1 = paddle.to_variable(x) + out1 = paddle.clip(x1, min=3.5, max=5.0) + out2 = paddle.clip(x1, min=2.5) + print(out1.numpy()) + # [[3.5, 3.5] + # [4.5, 5.0]] + print(out2.numpy()) + # [[2.5, 3.5] + # [[4.5, 6.4] """ assert min is not None or max is not None, "either min or max should be defined." @@ -1481,20 +1569,22 @@ def clamp(input, min=None, max=None, name=None): if in_dygraph_mode(): min = sys.float_info.min if min is None else min max = sys.float_info.max if max is None else max - return core.ops.clip(input, "min", min, "max", max) + return core.ops.clip(x, "min", min, "max", max) if min is not None: - check_type(min, 'min', (float, Variable), 'clamp') + check_type(min, 'min', (float, int, Variable), 'clip') if isinstance(min, Variable): check_dtype(min.dtype, 'min', ['float32', 'float64', 'int32'], - 'clamp', '(When the type of min in clamp is Variable.)') + 'clip', '(When the type of min in clip is Variable.)') if max is not None: - check_type(max, 'max', (float, Variable), 'clamp') + check_type(max, 'max', (float, int, Variable), 'clip') if isinstance(max, Variable): check_dtype(max.dtype, 'max', ['float32', 'float64', 'int32'], - 'clamp', '(When the type of max in clamp is Variable.)') + 'clip', '(When the type of max in clip is Variable.)') - inputs = {'X': input} + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'clip') + + inputs = {'X': x} attrs = {'min': sys.float_info.min, 'max': sys.float_info.max} if isinstance(min, Variable): @@ -1509,9 +1599,9 @@ def clamp(input, min=None, max=None, name=None): elif max is not None: attrs['max'] = max - helper = LayerHelper('clamp', **locals()) + helper = LayerHelper('clip', **locals()) output = helper.create_variable_for_type_inference( - dtype=helper.input_dtype()) + dtype=helper.input_dtype()) helper.append_op( type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs) @@ -1735,3 +1825,254 @@ def cumsum(x, axis=None, dtype=None, name=None): kwargs[name] = val _cum_sum_ = generate_layer_fn('cumsum') return _cum_sum_(**kwargs) + +def isfinite(x, name=None): + """ + + Return whether every element of input tensor is finite number or not. + + Args: + x (Tensor): The input tensor, it's data type should be float16, float32, float64, int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + `Tensor`, the bool result which shows every element of `x` whether it is finite number or not. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) + x = paddle.to_tensor(x_np) + out = paddle.tensor.isfinite(x) + print(out.numpy()) # [False True True False True False False] + """ + if in_dygraph_mode(): + return core.ops.isfinite_v2(x) + helper = LayerHelper("isfinite_v2", **locals()) + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'isfinite') + out = helper.create_variable_for_type_inference('bool') + helper.append_op(type="isfinite_v2", inputs={"X": x}, outputs={"Out": out}) + return out + +def isinf(x, name=None): + """ + + Return whether every element of input tensor is `+/-INF` or not. + + Args: + x (Tensor): The input tensor, it's data type should be float16, float32, float64, int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + `Tensor`, the bool result which shows every element of `x` whether it is `+/-INF` or not. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) + x = paddle.to_tensor(x_np) + out = paddle.tensor.isinf(x) + print(out.numpy()) # [ True False False True False False False] + """ + if in_dygraph_mode(): + return core.ops.isinf_v2(x) + helper = LayerHelper("isinf_v2", **locals()) + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'isinf') + out = helper.create_variable_for_type_inference(dtype='bool') + helper.append_op(type="isinf_v2", inputs={"X": x}, outputs={"Out": out}) + return out + +def isnan(x, name=None): + """ + + Return whether every element of input tensor is `NaN` or not. + + Args: + x (Tensor): The input tensor, it's data type should be float16, float32, float64, int32, int64. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + `Tensor`, the bool result which shows every element of `x` whether it is `NaN` or not. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) + x = paddle.to_tensor(x_np) + out = paddle.tensor.isnan(x) + print(out.numpy()) # [False False False False False True True] + """ + if in_dygraph_mode(): + return core.ops.isnan_v2(x) + helper = LayerHelper("isnan_v2", **locals()) + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'isnan') + out = helper.create_variable_for_type_inference(dtype='bool') + helper.append_op(type="isnan_v2", inputs={"X": x}, outputs={"Out": out}) + return out + + +def prod(x, axis=None, keepdim=False, dtype=None, name=None): + """ + Compute the product of tensor elements over the given axis. + + Args: + x(Tensor): The input tensor, its data type should be float32, float64, int32, int64. + axis(int|list|tuple, optional): The axis along which the product is computed. If :attr:`None`, + multiply all elements of `x` and return a Tensor with a single element, + otherwise must be in the range :math:`[-x.ndim, x.ndim)`. If :math:`axis[i]<0`, + the axis to reduce is :math:`x.ndim + axis[i]`. Default is None. + dtype(str|np.dtype, optional): The desired date type of returned tensor, can be float32, float64, + int32, int64. If specified, the input tensor is casted to dtype before operator performed. + This is very useful for avoiding data type overflows. The default value is None, the dtype + of output is the same as input Tensor `x`. + keepdim(bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result + tensor will have one fewer dimension than the input unless `keepdim` is true. Default is False. + name(string, optional): The default value is None. Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` . + + Returns: + Tensor, result of product on the specified dim of input tensor. + + Raises: + ValueError: The :attr:`dtype` must be float32, float64, int32 or int64. + TypeError: The type of :attr:`axis` must be int, list or tuple. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + # the axis is a int element + data_x = np.array([[0.2, 0.3, 0.5, 0.9], + [0.1, 0.2, 0.6, 0.7]]).astype(np.float32) + x = paddle.to_tensor(data_x) + out1 = paddle.prod(x) + print(out1.numpy()) + # [0.0002268] + + out2 = paddle.prod(x, -1) + print(out2.numpy()) + # [0.027 0.0084] + + out3 = paddle.prod(x, 0) + print(out3.numpy()) + # [0.02 0.06 0.3 0.63] + print(out3.numpy().dtype) + # float32 + + out4 = paddle.prod(x, 0, keepdim=True) + print(out4.numpy()) + # [[0.02 0.06 0.3 0.63]] + + out5 = paddle.prod(x, 0, dtype='int64') + print(out5.numpy()) + # [0 0 0 0] + print(out5.numpy().dtype) + # int64 + + # the axis is list + data_y = np.array([[[1.0, 2.0], [3.0, 4.0]], + [[5.0, 6.0], [7.0, 8.0]]]) + y = paddle.to_tensor(data_y) + out6 = paddle.prod(y, [0, 1]) + print(out6.numpy()) + # [105. 384.] + + out7 = paddle.prod(y, (1, 2)) + print(out7.numpy()) + # [ 24. 1680.] + + """ + if dtype is not None: + check_dtype(dtype, 'dtype', ['float32', 'float64', 'int32', 'int64'], 'prod') + if x.dtype != convert_np_dtype_to_dtype_(dtype): + x = layers.cast(x, dtype) + + return layers.reduce_prod(input=x, dim=axis, keep_dim=keepdim, name=name) + + +def sign(x, name=None): + """ + This OP returns sign of every element in `x`: 1 for positive, -1 for negative and 0 for zero. + + Args: + x(Tensor): The input tensor. The data type can be float16, float32 or float64. + name (str, optional): The default value is None. Normally there is no need for user to + set this property. For more information, please refer to :ref:`api_guide_Name` + + Returns: + Tensor: The output sign tensor with identical shape and data type to the input :attr:`x`. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + data = np.array([3.0, 0.0, -2.0, 1.7], dtype='float32') + paddle.disable_static() + x = paddle.to_tensor(data) + out = paddle.sign(x=x) + print(out) # [1.0, 0.0, -1.0, 1.0] + """ + if in_dygraph_mode(): + return core.ops.sign(x) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'sign') + helper = LayerHelper("sign", **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op(type='sign', inputs={'X': [x]}, outputs={'Out': [out]}) + + return out + + +def tanh(x, name=None): + """ + Tanh Activation Operator. + + .. math:: + out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} + + Args: + x (Tensor): Input of Tanh operator, an N-D Tensor, with data type float32, float64 or float16. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Output of Tanh operator, a Tensor with same data type and shape as input. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x_data = np.array([-0.4, -0.2, 0.1, 0.3]) + x = paddle.to_tensor(x_data) + out = paddle.tanh(x) + print(out.numpy()) + # [-0.37994896 -0.19737532 0.09966799 0.29131261] + """ + if in_dygraph_mode(): + return core.ops.tanh(x) + + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'tanh') + helper = LayerHelper('tanh', **locals()) + out = helper.create_variable_for_type_inference(x.dtype) + helper.append_op(type='tanh', inputs={'X': x}, outputs={'Out': out}) + return out diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index d26003fd826cfb3f3905b6aeea3e9492fab39cea..005e7beefe6877530b0a3c89d3bc8bfeabebc59d 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -21,22 +21,412 @@ from ..fluid.framework import device_guard, in_dygraph_mode, _varbase_creator, V from ..fluid.layers.layer_function_generator import templatedoc from ..fluid.layer_helper import LayerHelper from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype -from ..fluid.layers import utils, uniform_random, gaussian_random +from ..fluid.layers import utils from ..fluid.layers.tensor import fill_constant +import paddle +import warnings from ..fluid.io import shuffle #DEFINE_ALIAS __all__ = [ - # 'gaussin', - # 'uniform', + 'bernoulli', + 'standard_normal', + 'normal', + 'uniform', 'shuffle', 'randn', 'rand', 'randint', - 'randperm' + 'randperm', ] +def bernoulli(x, name=None): + """ + + This OP returns a Tensor filled with random binary(0 or 1) number from a Bernoulli distribution. + The input ``x`` is a tensor with probabilities for generating the random binary number. + Each element in ``x`` should be in [0, 1], and the out is generated by: + + .. math:: + + out_i ~ Bernoulli (x_i) + + Args: + x(Tensor): A tensor with probabilities for generating the random binary number. The data type + should be float32, float64. + name(str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + Returns: + Tensor: A Tensor filled with random binary number with the same shape and dtype as ``x``. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = paddle.rand([2, 3]) + print(x.numpy()) + # [[0.11272584 0.3890902 0.7730957 ] + # [0.10351662 0.8510418 0.63806665]] + + out = paddle.bernoulli(x) + print(out.numpy()) + # [[0. 0. 1.] + # [0. 0. 1.]] + + """ + + if in_dygraph_mode(): + return core.ops.bernoulli(x) + + check_variable_and_dtype(x, "x", ["float32", "float64"], "bernoulli") + + helper = LayerHelper("randint", **locals()) + out = helper.create_variable_for_type_inference( + dtype=x.dtype) # maybe set out to int32 ? + helper.append_op( + type='bernoulli', inputs={"X": x}, outputs={'Out': out}, attrs={}) + return out + + +def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None): + """ + This OP returns a Tensor filled with random values sampled from a Gaussian + distribution, with ``shape`` and ``dtype``. + + Args: + shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + is a list or tuple, the elements of it should be integers or Tensors + (with the shape [1], and the data type int32 or int64). If ``shape`` + is a Tensor, it should be a 1-D Tensor(with the data type int32 or + int64). + mean(float|int, optional): Mean of the output tensor, default is 0.0. + std(float|int, optional): Standard deviation of the output tensor, default + is 1.0. + seed(int, optional): ${seed_comment} + dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of + the output Tensor. Supported data types: float32, float64. + Default is float32. + name(str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + + Returns: + Tensor: A Tensor filled with random values sampled from a Gaussian + distribution, with ``shape`` and ``dtype``. + """ + if not isinstance(dtype, core.VarDesc.VarType): + dtype = convert_np_dtype_to_dtype_(dtype) + seed = 0 + op_type_for_check = 'gaussian_random/standard_normal/randn/normal' + + if in_dygraph_mode(): + shape = utils._convert_shape_to_list(shape) + return core.ops.gaussian_random('shape', shape, 'mean', + float(mean), 'std', + float(std), 'seed', seed, 'dtype', + dtype) + + check_type(shape, 'shape', (list, tuple, Variable), op_type_for_check) + check_dtype(dtype, 'dtype', ['float32', 'float64'], op_type_for_check) + + inputs = {} + attrs = { + 'mean': mean, + 'std': std, + 'seed': seed, + 'dtype': dtype, + 'use_mkldnn': False + } + utils._get_shape_tensor_inputs( + inputs=inputs, attrs=attrs, shape=shape, op_type=op_type_for_check) + + helper = LayerHelper('gaussian_random', **locals()) + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='gaussian_random', + inputs=inputs, + outputs={'Out': out}, + attrs=attrs) + out.stop_gradient = True + return out + + +def standard_normal(shape, dtype=None, name=None): + """ + This OP returns a Tensor filled with random values sampled from a standard + normal distribution with mean 0 and standard deviation 1, with ``shape`` + and ``dtype``. + + Args: + shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + is a list or tuple, the elements of it should be integers or Tensors + (with the shape [1], and the data type int32 or int64). If ``shape`` + is a Tensor, it should be a 1-D Tensor(with the data type int32 or + int64). + dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the + output tensor. Supported data types: float32, float64. If ``dytpe`` + is None, the data type is float32. Default is None. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: A Tensor filled with random values sampled from a standard + normal distribution with mean 0 and standard deviation 1, with + ``shape`` and ``dtype``. + + Raises: + TypeError: If ``shape`` is not list, tuple, Tensor. + TypeError: If ``dtype`` is not float32, float64. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + # example 1: attr shape is a list which doesn't contain Tensor. + result_1 = paddle.standard_normal(shape=[2, 3]) + # [[-2.923464 , 0.11934398, -0.51249987], # random + # [ 0.39632758, 0.08177969, 0.2692008 ]] # random + + # example 2: attr shape is a list which contains Tensor. + dim_1 = paddle.fill_constant([1], "int64", 2) + dim_2 = paddle.fill_constant([1], "int32", 3) + result_2 = paddle.standard_normal(shape=[dim_1, dim_2, 2]) + # [[[-2.8852394 , -0.25898588], # random + # [-0.47420555, 0.17683524], # random + # [-0.7989969 , 0.00754541]], # random + # [[ 0.85201347, 0.32320443], # random + # [ 1.1399018 , 0.48336947], # random + # [ 0.8086993 , 0.6868893 ]]] # random + + # example 3: attr shape is a Tensor, the data type must be int64 or int32. + var_shape = paddle.to_tensor(np.array([2, 3])) + result_3 = paddle.standard_normal(var_shape) + # [[-2.878077 , 0.17099959, 0.05111201] # random + # [-0.3761474, -1.044801 , 1.1870178 ]] # random + + """ + if dtype is None: + dtype = 'float32' + + return gaussian_random( + shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name) + + +randn = standard_normal + + +def normal(mean=0.0, std=1.0, shape=None, name=None): + """ + This OP returns a Tensor filled with random values sampled from a normal + distribution with ``mean`` and ``std`` (standard deviation) . + + If ``mean`` is a Tensor, the output Tensor has the same shape and data type as ``mean``. + If ``mean`` is not a Tensor and ``std`` is a Tensor, the output Tensor has the same shape and data type as ``std``. + If ``mean`` and ``std`` are not a Tensor, the output Tensor has the same shape as ``shape``, with data type float32. + + If ``mean`` and ``std`` are Tensor, the num of elements of ``mean`` and ``std`` should be the same. + + Args: + mean (float|Tensor, optional): The mean of the output Tensor's normal distribution. + If ``mean`` is float, all elements of the output Tensor shared the same mean. + If ``mean`` is a Tensor(data type supports float32, float64), it has per-element means. + Default is 0.0 + std (float|Tensor, optional): The standard deviation of the output Tensor's normal distribution. + If ``std`` is float, all elements of the output Tensor shared the same standard deviation. + If ``std`` is a Tensor(data type supports float32, float64), it has per-element standard deviations. + Defaule is 1.0 + shape (list|tuple|Tensor, optional): The shape of the output Tensor. If ``shape`` + is a list or tuple, the elements of it should be integers or Tensors + (with the shape [1], and the data type int32 or int64). If ``shape`` + is a Tensor, it should be a 1-D Tensor(with the data type int32 or + int64). If ``mean`` or ``std`` is a Tensor, the shape of the output + Tensor is the same as ``mean`` or ``std`` , attr ``shape`` is ignored. + Default is None + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + A Tensor filled with random values sampled from a normal distribution with ``mean`` and ``std`` . + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + out1 = paddle.normal(shape=[2, 3]) + # [[ 0.17501129 0.32364586 1.561118 ] # random + # [-1.7232178 1.1545963 -0.76156676]] # random + + mean_tensor = paddle.to_tensor(np.array([1.0, 2.0, 3.0])) + out2 = paddle.normal(mean=mean_tensor) + # [ 0.18644847 -1.19434458 3.93694787] # random + + std_tensor = paddle.to_tensor(np.array([1.0, 2.0, 3.0])) + out3 = paddle.normal(mean=mean_tensor, std=std_tensor) + # [1.00780561 3.78457445 5.81058198] # random + + """ + if not in_dygraph_mode(): + check_type(mean, 'mean', (int, float, Variable), 'normal') + check_type(std, 'std', (int, float, Variable), 'normal') + if isinstance(mean, Variable): + check_dtype( + mean.dtype, 'mean', ['float32', 'float64'], 'normal', + "If mean is Tensor, it's data type only support float32, float64." + ) + if isinstance(std, Variable): + check_dtype( + std.dtype, 'std', ['float32', 'float64'], 'normal', + "If std is Tensor, it's data type only support float32, float64." + ) + if shape is not None: + if isinstance(shape, (list, tuple)): + for item in shape: + check_type(item, 'shape', (int), 'normal', + 'Elements of shape should be int.') + elif isinstance(shape, Variable): + check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'normal') + else: + assert TypeError( + 'If mean and std are all not Tensor, shape should be list, tuple, Tensor.' + ) + + if isinstance(mean, Variable): + if isinstance(std, Variable): + if std.dtype != mean.dtype: + std = paddle.cast(std, mean.dtype) + mean_shape = paddle.shape(mean) + std = paddle.reshape(std, mean_shape) + else: + std = float(std) + out = standard_normal(paddle.shape(mean), mean.dtype, name) + elif isinstance(std, Variable): + mean = float(mean) + out = standard_normal(paddle.shape(std), std.dtype, name) + else: + return gaussian_random(shape=shape, mean=mean, std=std, name=name) + + out = out * std + mean + if not in_dygraph_mode(): + out.stop_grediant = True + return out + + +def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None): + """ + This OP returns a Tensor filled with random values sampled from a uniform + distribution in the range [``min``, ``max``), with ``shape`` and ``dtype``. + + Examples: + :: + Input: + shape = [1, 2] + Output: + result=[[0.8505902, 0.8397286]] + + Args: + shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + is a list or tuple, the elements of it should be integers or Tensors + (with the shape [1], and the data type int32 or int64). If ``shape`` + is a Tensor, it should be a 1-D Tensor(with the data type int32 or + int64). + dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of + the output Tensor. Supported data types: float32, float64. + Default is float32. + min(float|int, optional): The lower bound on the range of random values + to generate, ``min`` is included in the range. Default is -1.0. + max(float|int, optional): The upper bound on the range of random values + to generate, ``max`` is excluded in the range. Default is 1.0. + seed(int, optional): Random seed used for generating samples. 0 means + use a seed generated by the system. Note that if seed is not 0, + this operator will always generate the same random numbers every + time. Default is 0. + name(str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + + Returns: + Tensor: A Tensor filled with random values sampled from a uniform + distribution in the range [``min``, ``max``), with ``shape`` and ``dtype``. + + Raises: + TypeError: If ``shape`` is not list, tuple, Tensor. + TypeError: If ``dtype`` is not float32, float64. + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + + # example 1: + # attr shape is a list which doesn't contain Tensor. + result_1 = paddle.tensor.random.uniform(shape=[3, 4]) + # [[ 0.84524226, 0.6921872, 0.56528175, 0.71690357], + # [-0.34646994, -0.45116323, -0.09902662, -0.11397249], + # [ 0.433519, 0.39483607, -0.8660099, 0.83664286]] + + # example 2: + # attr shape is a list which contains Tensor. + dim_1 = paddle.fill_constant([1], "int64", 2) + dim_2 = paddle.fill_constant([1], "int32", 3) + result_2 = paddle.tensor.random.uniform(shape=[dim_1, dim_2]) + # [[-0.9951253, 0.30757582, 0.9899647 ], + # [ 0.5864527, 0.6607096, -0.8886161 ]] + + # example 3: + # attr shape is a Tensor, the data type must be int64 or int32. + shape = np.array([2, 3]) + shape_tensor = paddle.to_tensor(shape) + result_3 = paddle.tensor.random.uniform(shape_tensor) + # if shape_tensor's value is [2, 3] + # result_3 is: + # [[-0.8517412, -0.4006908, 0.2551912 ], + # [ 0.3364414, 0.36278176, -0.16085452]] + + paddle.enable_static() + + """ + if not isinstance(dtype, core.VarDesc.VarType): + dtype = convert_np_dtype_to_dtype_(dtype) + + if in_dygraph_mode(): + shape = utils._convert_shape_to_list(shape) + return core.ops.uniform_random('shape', shape, 'min', + float(min), 'max', + float(max), 'seed', seed, 'dtype', dtype) + + check_type(shape, 'shape', (list, tuple, Variable), 'uniform_random/rand') + check_dtype(dtype, 'dtype', ('float32', 'float64'), 'uniform_random/rand') + + inputs = dict() + attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} + utils._get_shape_tensor_inputs( + inputs=inputs, attrs=attrs, shape=shape, op_type='uniform_random/rand') + + helper = LayerHelper("uniform_random", **locals()) + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type="uniform_random", inputs=inputs, attrs=attrs, + outputs={"Out": out}) + return out + + def randint(low=0, high=None, shape=[1], dtype=None, name=None): """ :alias_main: paddle.randint @@ -78,40 +468,40 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): Examples: .. code-block:: python - import paddle - import numpy as np - - paddle.disable_static() - - # example 1: - # attr shape is a list which doesn't contain Tensor. - result_1 = paddle.randint(low=-5, high=5, shape=[3]) - # [0, -3, 2] - - # example 2: - # attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.randint(low=-5, high=5, shape=[dim_1, dim_2], dtype="int32") - # [[0, -1, -3], - # [4, -2, 0]] - - # example 3: - # attr shape is a Tensor - var_shape = paddle.to_variable(np.array([3])) - result_3 = paddle.randint(low=-5, high=5, shape=var_shape) - # [-2, 2, 3] - - # example 4: - # data type is int32 - result_4 = paddle.randint(low=-5, high=5, shape=[3], dtype='int32') - # [-5, 4, -4] - - # example 5: - # Input only one parameter - # low=0, high=10, shape=[1], dtype='int64' - result_5 = paddle.randint(10) - # [7] + import paddle + import numpy as np + + paddle.disable_static() + + # example 1: + # attr shape is a list which doesn't contain Tensor. + result_1 = paddle.randint(low=-5, high=5, shape=[3]) + # [0, -3, 2] # random + + # example 2: + # attr shape is a list which contains Tensor. + dim_1 = paddle.fill_constant([1], "int64", 2) + dim_2 = paddle.fill_constant([1], "int32", 3) + result_2 = paddle.randint(low=-5, high=5, shape=[dim_1, dim_2], dtype="int32") + # [[0, -1, -3], # random + # [4, -2, 0]] # random + + # example 3: + # attr shape is a Tensor + var_shape = paddle.to_variable(np.array([3])) + result_3 = paddle.randint(low=-5, high=5, shape=var_shape) + # [-2, 2, 3] # random + + # example 4: + # data type is int32 + result_4 = paddle.randint(low=-5, high=5, shape=[3], dtype='int32') + # [-5, 4, -4] # random + + # example 5: + # Input only one parameter + # low=0, high=10, shape=[1], dtype='int64' + result_5 = paddle.randint(10) + # [7] # random """ if high is None: @@ -150,77 +540,6 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): return out -def randn(shape, dtype=None, name=None): - """ - :alias_main: paddle.randn - :alias: paddle.tensor.randn, paddle.tensor.random.randn - - This OP returns a Tensor filled with random values sampled from a normal - distribution with mean 0 and standard deviation 1 (also called the standard - normal distribution), with ``shape`` and ``dtype``. - - Args: - shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` - is a list or tuple, the elements of it should be integers or Tensors - (with the shape [1], and the data type int32 or int64). If ``shape`` - is a Tensor, it should be a 1-D Tensor(with the data type int32 or - int64). - dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the - output tensor. Supported data types: float32, float64. If ``dytpe`` - is None, the data type is float32. Default is None. - name(str, optional): The default value is None. Normally there is no - need for user to set this property. For more information, please - refer to :ref:`api_guide_Name`. - - Returns: - Tensor: A Tensor filled with random values sampled from a normal - distribution with mean 0 and standard deviation 1 (also called the - standard normal distribution), with ``shape`` and ``dtype``. - - Raises: - TypeError: If ``shape`` is not list, tuple, Tensor. - TypeError: If ``dtype`` is not float32, float64. - - Examples: - .. code-block:: python - - import paddle - import numpy as np - - paddle.disable_static() - - # example 1: attr shape is a list which doesn't contain Tensor. - result_1 = paddle.randn(shape=[2, 3]) - # [[-2.923464 , 0.11934398, -0.51249987], - # [ 0.39632758, 0.08177969, 0.2692008 ]] - - # example 2: attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.randn(shape=[dim_1, dim_2, 2]) - # [[[-2.8852394 , -0.25898588], - # [-0.47420555, 0.17683524], - # [-0.7989969 , 0.00754541]], - # [[ 0.85201347, 0.32320443], - # [ 1.1399018 , 0.48336947], - # [ 0.8086993 , 0.6868893 ]]] - - # example 3: attr shape is a Tensor, the data type must be int64 or int32. - var_shape = paddle.to_variable(np.array([2, 3])) - result_3 = paddle.randn(var_shape) - # [[-2.878077 , 0.17099959, 0.05111201] - # [-0.3761474, -1.044801 , 1.1870178 ]] - - """ - if dtype is None: - dtype = 'float32' - - out = gaussian_random( - shape=shape, mean=0.0, std=1.0, seed=0, dtype=dtype, name=name) - out.stop_gradient = True - return out - - @templatedoc() def randperm(n, dtype="int64", name=None): """ @@ -250,15 +569,15 @@ def randperm(n, dtype="int64", name=None): Examples: .. code-block:: python - import paddle + import paddle - paddle.disable_static() + paddle.disable_static() - result_1 = paddle.randperm(5) - # [4, 1, 2, 3, 0] + result_1 = paddle.randperm(5) + # [4, 1, 2, 3, 0] # random - result_2 = paddle.randperm(7, 'int32') - # [1, 6, 2, 0, 4, 3, 5] + result_2 = paddle.randperm(7, 'int32') + # [1, 6, 2, 0, 4, 3, 5] # random """ if not isinstance(dtype, core.VarDesc.VarType): @@ -322,36 +641,36 @@ def rand(shape, dtype=None, name=None): Examples: .. code-block:: python - import paddle - import numpy as np - - paddle.disable_static() - # example 1: attr shape is a list which doesn't contain Tensor. - result_1 = paddle.rand(shape=[2, 3]) - # [[0.451152 , 0.55825245, 0.403311 ], - # [0.22550228, 0.22106001, 0.7877319 ]] - - # example 2: attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.rand(shape=[dim_1, dim_2, 2]) - # [[[0.8879919 , 0.25788337], - # [0.28826773, 0.9712097 ], - # [0.26438272, 0.01796806]], - # [[0.33633623, 0.28654453], - # [0.79109055, 0.7305809 ], - # [0.870881 , 0.2984597 ]]] - - # example 3: attr shape is a Tensor, the data type must be int64 or int32. - var_shape = paddle.to_variable(np.array([2, 3])) - result_3 = paddle.rand(var_shape) - # [[0.22920267, 0.841956 , 0.05981819], - # [0.4836288 , 0.24573246, 0.7516129 ]] + import paddle + import numpy as np + + paddle.disable_static() + # example 1: attr shape is a list which doesn't contain Tensor. + result_1 = paddle.rand(shape=[2, 3]) + # [[0.451152 , 0.55825245, 0.403311 ], # random + # [0.22550228, 0.22106001, 0.7877319 ]] # random + + # example 2: attr shape is a list which contains Tensor. + dim_1 = paddle.fill_constant([1], "int64", 2) + dim_2 = paddle.fill_constant([1], "int32", 3) + result_2 = paddle.rand(shape=[dim_1, dim_2, 2]) + # [[[0.8879919 , 0.25788337], # random + # [0.28826773, 0.9712097 ], # random + # [0.26438272, 0.01796806]], # random + # [[0.33633623, 0.28654453], # random + # [0.79109055, 0.7305809 ], # random + # [0.870881 , 0.2984597 ]]] # random + + # example 3: attr shape is a Tensor, the data type must be int64 or int32. + var_shape = paddle.to_variable(np.array([2, 3])) + result_3 = paddle.rand(var_shape) + # [[0.22920267, 0.841956 , 0.05981819], # random + # [0.4836288 , 0.24573246, 0.7516129 ]] # random """ if dtype is None: dtype = 'float32' - out = uniform_random(shape, dtype, min=0.0, max=1.0, name=name) + out = uniform(shape, dtype, min=0.0, max=1.0, name=name) out.stop_gradient = True return out diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 0f8381d824027670250f4b59607b4275e43b5e22..91ad3bfa9cc1babd22cf9419ede33ae26f0dc900 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -29,13 +29,13 @@ __all__ = [ 'argsort', 'has_inf', 'has_nan', - # 'masked_select', + 'masked_select', 'topk', 'where', 'index_select', 'nonzero', 'sort', - 'index_sample' + 'index_sample', ] from paddle.common_ops_import import * @@ -125,95 +125,168 @@ def argsort(x, axis=-1, descending=False, name=None): return ids -def argmax(input, axis=None, dtype=None, out=None, keepdims=False, name=None): +def argmax(x, axis=None, dtype=None, keepdim=False, name=None): """ - :alias_main: paddle.argmax - :alias: paddle.argmax,paddle.tensor.argmax,paddle.tensor.search.argmax - This OP computes the indices of the max elements of the input tensor's element along the provided axis. Args: - input(Variable): An input N-D Tensor with type float32, float64, int16, + x(Tensor): An input N-D Tensor with type float32, float64, int16, int32, int64, uint8. axis(int, optional): Axis to compute indices along. The effective range - is [-R, R), where R is Rank(input). when axis<0, it works the same way - as axis+R. Default is None, it will use the last dim to select indices of max value. - dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output tensor which can + is [-R, R), where R is x.ndim. when axis < 0, it works the same way + as axis + R. Default is None, the input `x` will be into the flatten tensor, and selecting the min value index. + dtype(str): Data type of the output tensor which can be int32, int64. The default value is None, and it will return the int64 indices. - out(Variable, optional): Optional output which can be any created - Variable that meets the requirements to store the result of operation. - if out is None, a new Varibale will be create to store the result. Defalut is None. - keepdims(bool, optional): Keep the axis that do the select max. + keepdim(bool, optional): Keep the axis that selecting max. The defalut value is False. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: A Tensor with data type int64. + Tensor, return the tensor of `int32` if set :attr:`dtype` is `int32`, otherwise return the tensor of `int64` Examples: .. code-block:: python - import paddle - import paddle.fluid as fluid import numpy as np + import paddle - in1 = np.array([[[5,8,9,5], - [0,0,1,7], - [6,9,2,4]], - [[5,2,4,2], - [4,7,7,9], - [1,7,0,6]]]) - with fluid.dygraph.guard(): - x = fluid.dygraph.to_variable(in1) - out1 = paddle.argmax(input=x, axis=-1) - out2 = paddle.argmax(input=x, axis=0) - out3 = paddle.argmax(input=x, axis=1) - out4 = paddle.argmax(input=x, axis=2) - out5 = paddle.argmax(input=x, axis=2, keepdims=True) - print(out1.numpy()) - # [[2 3 1] - # [0 3 1]] - print(out2.numpy()) - # [[0 0 0 0] - # [1 1 1 1] - # [0 0 0 1]] - print(out3.numpy()) - # [[2 2 0 1] - # [0 1 1 1]] - print(out4.numpy()) - # [[2 3 1] - # [0 3 1]] - print(out5.numpy()) - #array([[[2], - # [3], - # [1]], - # [[0], - # [3], - # [1]]]) + paddle.disable_static() + data = np.array([[5,8,9,5], + [0,0,1,7], + [6,9,2,4]]) + x = paddle.to_variable(data) + out1 = paddle.argmax(x) + print(out1.numpy()) # 2 + out2 = paddle.argmax(x, axis=1) + print(out2.numpy()) + # [2 3 1] + out3 = paddle.argmax(x, axis=-1) + print(out3.numpy()) + # [2 3 1] """ - helper = LayerHelper("arg_max", **locals()) + flatten = False + if axis is None: + flatten = True + axis = 0 + + if in_dygraph_mode(): + if dtype != None: + var_dtype = convert_np_dtype_to_dtype_(dtype) + out = core.ops.arg_max(x, 'axis', axis, 'dtype', var_dtype, + 'keepdim', keepdim, 'flatten', flatten) + else: + out = core.ops.arg_max(x, 'axis', axis, 'keepdim', keepdim, + 'flatten', flatten) + return out + + helper = LayerHelper("argmax", **locals()) + check_variable_and_dtype( + x, 'x', ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'], + 'paddle.argmax') var_dtype = None attrs = {} if dtype is not None: - check_dtype(dtype, 'create data type', ['int32', 'int64'], 'arg_max') + if dtype not in ['int32', 'int64']: + raise ValueError( + "The value of 'dtype' in argmax op must be int32, int64, but received of {}". + format(dtype)) var_dtype = convert_np_dtype_to_dtype_(dtype) attrs["dtype"] = var_dtype else: var_dtype = VarDesc.VarType.INT64 - if out is None: - out = helper.create_variable_for_type_inference(var_dtype) + + out = helper.create_variable_for_type_inference(var_dtype) + attrs['keepdims'] = keepdim + attrs['axis'] = axis + attrs['flatten'] = flatten + helper.append_op( + type='arg_max', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) + out.stop_gradient = True + return out + + +def argmin(x, axis=None, dtype=None, keepdim=False, name=None): + """ + This OP computes the indices of the min elements of the input tensor's + element along the provided axis. + + Args: + x(Tensor): An input N-D Tensor with type float32, float64, int16, + int32, int64, uint8. + axis(int, optional): Axis to compute indices along. The effective range + is [-R, R), where R is x.ndim. when axis < 0, it works the same way + as axis + R. Default is None, the input `x` will be into the flatten tensor, and selecting the min value index. + dtype(str): Data type of the output tensor which can + be int32, int64. The default value is None, and it will + return the int64 indices. + keepdim(bool, optional): Keep the axis that selecting min. The defalut value is False. + name(str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + + Returns: + Tensor, return the tensor of `int32` if set :attr:`dtype` is `int32`, otherwise return the tensor of `int64` + + Examples: + .. code-block:: python + + import numpy as np + import paddle + + paddle.disable_static() + data = np.array([[5,8,9,5], + [0,0,1,7], + [6,9,2,4]]) + x = paddle.to_variable(data) + out1 = paddle.argmin(x) + print(out1.numpy()) # 4 + out2 = paddle.argmin(x, axis=1) + print(out2.numpy()) + # [0 0 2] + out3 = paddle.argmin(x, axis=-1) + print(out3.numpy()) + # [0 0 2] + """ + flatten = False if axis is None: - axis = -1 - attrs['keepdims'] = keepdims + flatten = True + axis = 0 + + if in_dygraph_mode(): + if dtype != None: + var_dtype = convert_np_dtype_to_dtype_(dtype) + out = core.ops.arg_min(x, 'axis', axis, 'dtype', var_dtype, + 'keepdim', keepdim, 'flatten', flatten) + else: + out = core.ops.arg_min(x, 'axis', axis, 'keepdim', keepdim, + 'flatten', flatten) + return out + + helper = LayerHelper("argmin", **locals()) + check_variable_and_dtype( + x, 'x', ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'], + 'paddle.argmin') + var_dtype = None + attrs = {} + if dtype is not None: + if dtype not in ['int32', 'int64']: + raise ValueError( + "The value of 'dtype' in argmin op must be int32, int64, but received of {}". + format(dtype)) + var_dtype = convert_np_dtype_to_dtype_(dtype) + attrs["dtype"] = var_dtype + else: + var_dtype = VarDesc.VarType.INT64 + + out = helper.create_variable_for_type_inference(var_dtype) + attrs['keepdims'] = keepdim attrs['axis'] = axis + attrs['flatten'] = flatten helper.append_op( - type='arg_max', - inputs={'X': input}, - outputs={'Out': [out]}, - attrs=attrs) + type='arg_min', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) out.stop_gradient = True return out @@ -255,8 +328,8 @@ def index_select(x, index, axis=0, name=None): [9.0, 10.0, 11.0, 12.0]]) data_index = np.array([0, 1, 1]).astype('int32') - x = paddle.to_variable(data) - index = paddle.to_variable(data_index) + x = paddle.to_tensor(data) + index = paddle.to_tensor(data_index) out_z1 = paddle.index_select(x=x, index=index) #[[1. 2. 3. 4.] # [5. 6. 7. 8.] @@ -629,3 +702,57 @@ def index_sample(x, index): 'Index': index}, outputs={'Out': out}) return out + + +def masked_select(x, mask, name=None): + """ + This OP Returns a new 1-D tensor which indexes the input tensor according to the ``mask`` + which is a tensor with data type of bool. + + Args: + x (Tensor): The input Tensor, the data type can be int32, int64, float32, float64. + mask (Tensor): The Tensor containing the binary mask to index with, it's data type is bool. + name(str, optional): The default value is None. Normally there is no + need for user to set this property. For more information, please + refer to :ref:`api_guide_Name`. + + Returns: A 1-D Tensor which is the same data type as ``x``. + + Raises: + TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of float32, float64, int32 and int64. + TypeError: ``mask`` must be a Tensor and the data type of ``mask`` must be bool. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + data = np.array([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]]).astype('float32') + + mask_data = np.array([[True, False, False, False], + [True, True, False, False], + [True, False, False, False]]).astype('bool') + x = paddle.to_tensor(data) + mask = paddle.to_tensor(mask_data) + out = paddle.masked_select(x, mask) + #[1.0 5.0 6.0 9.0] + """ + + if in_dygraph_mode(): + return core.ops.masked_select(x, mask) + + helper = LayerHelper("masked_select", **locals()) + check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], + 'paddle.tensor.search.mask_select') + check_variable_and_dtype(mask, 'mask', ['bool'], + 'paddle.tensor.search.masked_select') + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='masked_select', inputs={'X': x, + 'Mask': mask}, outputs={'Y': out}) + return out diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 7d22a0be5b0a9a2088f22535c6e2e56f7dc1f959..91676a6316b81a1998b9b48fb9ea7fcba6d67c25 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -15,9 +15,10 @@ # TODO: define statistical functions of a tensor from ..fluid.layers import reduce_mean #DEFINE_ALIAS -__all__ = ['mean', 'reduce_mean', 'std', 'var'] +__all__ = ['mean', 'reduce_mean', 'std', 'var', 'numel'] import numpy as np +from ..fluid.framework import Variable from ..fluid.layer_helper import LayerHelper from ..fluid.framework import core, in_dygraph_mode from ..fluid import layers @@ -31,8 +32,7 @@ def mean(x, axis=None, keepdim=False, name=None): Computes the mean of the input tensor's elements along ``axis``. Args: - x (Tensor): The input Tensor with data type float32, float64, int32, - int64. + x (Tensor): The input Tensor with data type float32, float64. axis (int|list|tuple, optional): The axis along which to perform mean calculations. ``axis`` should be int, list(int) or tuple(int). If ``axis`` is a list/tuple of dimension(s), mean is calculated along @@ -40,9 +40,9 @@ def mean(x, axis=None, keepdim=False, name=None): should be in range [-D, D), where D is the dimensions of ``x`` . If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` . If ``axis`` is None, mean is - calculated along all elements of ``x``. Default is None. + calculated over all elements of ``x``. Default is None. keepdim (bool, optional): Whether to reserve the reduced dimension(s) - in the output Tensor. If ``keep_dim`` is True, the dimensions of + in the output Tensor. If ``keepdim`` is True, the dimensions of the output Tensor is the same as ``x`` except in the reduced dimensions(it is of size 1 in this case). Otherwise, the shape of the output Tensor is squeezed in ``axis`` . Default is False. @@ -67,7 +67,7 @@ def mean(x, axis=None, keepdim=False, name=None): [[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]], 'float32') - x = paddle.to_variable(x) + x = paddle.to_tensor(x) out1 = paddle.mean(x) # [12.5] out2 = paddle.mean(x, axis=-1) @@ -96,9 +96,12 @@ def mean(x, axis=None, keepdim=False, name=None): return core.ops.reduce_mean(x, 'dim', axis, 'keep_dim', keepdim, 'reduce_all', reduce_all) - check_variable_and_dtype(x, 'x/input', - ['float32', 'float64', 'int32', 'int64'], + check_variable_and_dtype(x, 'x/input', ['float32', 'float64'], 'mean/reduce_mean') + check_type(axis, 'axis/dim', (int, list, tuple), 'mean/reduce_mean') + if isinstance(axis, (list, tuple)): + for item in axis: + check_type(item, 'elements of axis/dim', (int), 'mean/reduce_mean') helper = LayerHelper('mean', **locals()) attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all} @@ -108,139 +111,155 @@ def mean(x, axis=None, keepdim=False, name=None): return out -def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None): +def var(x, axis=None, unbiased=True, keepdim=False, name=None): """ - :alias_main: paddle.var - :alias: paddle.var,paddle.tensor.var,paddle.tensor.stat.var - - Computes the variance of the input Variable's elements along the specified - axis. + Computes the variance of ``x`` along ``axis`` . Args: - input (Variable): The input Variable to be computed variance, with data - type float32 and float64 supported. - axis (list|int, optional): The axis along which the variance is computed. - If `None`, compute the variance over all elements of :attr:`input` - and return a Variable with a single element, otherwise it must be in - the range :math:`[-rank(input), rank(input))`. If :math:`axis[i] < 0`, - the axis to compute is :math:`rank(input) + axis[i]`. - keepdim (bool, optional): Whether to reserve the reduced dimensions in - the output Variable. The dimensions in :attr:`axis` will be squeezed - and the result Variable will have :attr:`len(axis)` fewer dimensions - than the :attr:`input` unless :attr:`keepdim` is true, default False. - unbiased (bool, optional): Whether to compute variance via the unbiased - estimator, in which the divisor used in the computation is - :math:`N - 1`, where :math:`N` represents the number of elements - along :attr:`axis`, otherwise the divisor is :math:`N`. Default True. - out (Variable, optional): Alternate output Variable to store the result - variance. Default None. - name (str, optional): The name for this layer. Normally there is no - need for user to set this property. For more information, please - refer to :ref:`api_guide_Name`. Default None. + x (Tensor): The input Tensor with data type float32, float64. + axis (int|list|tuple, optional): The axis along which to perform + variance calculations. ``axis`` should be int, list(int) or + tuple(int). If ``axis`` is a list/tuple of dimension(s), variance + is calculated along all element(s) of ``axis`` . ``axis`` or + element(s) of ``axis`` should be in range [-D, D), where D is the + dimensions of ``x`` . If ``axis`` or element(s) of ``axis`` is less + than 0, it works the same way as :math:`axis + D` . If ``axis`` is + None, variance is calculated over all elements of ``x``. Default + is None. + unbiased (bool, optional): Whether to use the unbiased estimation. If + ``unbiased`` is True, the divisor used in the computation is + :math:`N - 1`, where :math:`N` represents the number of elements + along ``axis`` , otherwise the divisor is :math:`N`. Default is True. + keepdim (bool, optional): Whether to reserve the reduced dimension(s) + in the output Tensor. If ``keepdim`` is True, the dimensions of + the output Tensor is the same as ``x`` except in the reduced + dimensions(it is of size 1 in this case). Otherwise, the shape of + the output Tensor is squeezed in ``axis`` . Default is False. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: The result variance with the same dtype as :attr:`input`. - If :attr:`out = None`, returns a new Variable containing the - variance, otherwise returns a reference to the output Variable. + Tensor, results of variance along ``axis`` of ``x``, with the same data + type as ``x``. Examples: .. code-block:: python - import numpy as np import paddle - import paddle.fluid.dygraph as dg - - a = np.array([[1.0, 2.0], [3.0, 4.0]]).astype("float32") - with dg.guard(): - data = dg.to_variable(a) - variance = paddle.var(data, axis=[1]) - print(variance.numpy()) - # [0.5 0.5] + import numpy as np + + paddle.disable_static() + + x = np.array([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) + x = paddle.to_tensor(x) + out1 = paddle.var(x) + # [2.66666667] + out2 = paddle.var(x, axis=1) + # [1. 4.33333333] """ - dtype = convert_dtype(input.dtype) - if dtype not in ["float32", "float64"]: - raise ValueError("Layer tensor.var() only supports floating-point " - "dtypes, but received {}.".format(dtype)) - rank = len(input.shape) - axes = axis if axis != None and axis != [] else range(rank) - axes = [e if e >= 0 else e + rank for e in axes] - inp_shape = input.shape if in_dygraph_mode() else layers.shape(input) - mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name) - tmp = layers.reduce_mean( - (input - mean)**2, dim=axis, keep_dim=keepdim, name=name) + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'var') + u = mean(x, axis, True, name) + out = paddle.sum((x - u)**2, axis, keepdim=keepdim, name=name) + + n = paddle.cast(paddle.numel(x), x.dtype) \ + / paddle.cast(paddle.numel(out), x.dtype) if unbiased: - n = 1 - for i in axes: - n *= inp_shape[i] - if not in_dygraph_mode(): - n = layers.cast(n, dtype) - zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0) - factor = where(n > 1.0, n / (n - 1.0), zero_const) - else: - factor = n / (n - 1.0) if n > 1.0 else 0.0 - tmp *= factor - if out: - layers.assign(input=tmp, output=out) - return out - else: - return tmp - - -def std(input, axis=None, keepdim=False, unbiased=True, out=None, name=None): + one_const = paddle.ones([1], x.dtype) + n = where(n > one_const, n - 1., one_const) + out /= n + return out + + +def std(x, axis=None, unbiased=True, keepdim=False, name=None): + """ + Computes the standard-deviation of ``x`` along ``axis`` . + + Args: + x (Tensor): The input Tensor with data type float32, float64. + axis (int|list|tuple, optional): The axis along which to perform + standard-deviation calculations. ``axis`` should be int, list(int) + or tuple(int). If ``axis`` is a list/tuple of dimension(s), + standard-deviation is calculated along all element(s) of ``axis`` . + ``axis`` or element(s) of ``axis`` should be in range [-D, D), + where D is the dimensions of ``x`` . If ``axis`` or element(s) of + ``axis`` is less than 0, it works the same way as :math:`axis + D` . + If ``axis`` is None, standard-deviation is calculated over all + elements of ``x``. Default is None. + unbiased (bool, optional): Whether to use the unbiased estimation. If + ``unbiased`` is True, the standard-deviation is calculated via the + unbiased estimator. If ``unbiased`` is True, the divisor used in + the computation is :math:`N - 1`, where :math:`N` represents the + number of elements along ``axis`` , otherwise the divisor is + :math:`N`. Default is True. + keepdim (bool, optional): Whether to reserve the reduced dimension(s) + in the output Tensor. If ``keepdim`` is True, the dimensions of + the output Tensor is the same as ``x`` except in the reduced + dimensions(it is of size 1 in this case). Otherwise, the shape of + the output Tensor is squeezed in ``axis`` . Default is False. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor, results of standard-deviation along ``axis`` of ``x``, with the + same data type as ``x``. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.disable_static() + + x = np.array([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) + x = paddle.to_tensor(x) + out1 = paddle.std(x) + # [1.63299316] + out2 = paddle.std(x, axis=1) + # [1. 2.081666] """ - :alias_main: paddle.std - :alias: paddle.std,paddle.tensor.std,paddle.tensor.stat.std + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'std') - Computes the standard-deviation of the input Variable's elements along the specified - axis. + out = var(**locals()) + return paddle.sqrt(out) + + +def numel(x, name=None): + """ + Returns the number of elements for a tensor, which is a int64 Tensor with shape [1] in static mode + or a scalar value in imperative mode Args: - input (Variable): The input Variable to be computed standard-deviation, with data - type float32 and float64 supported. - axis (list|int, optional): The axis along which the standard-deviation is computed. - If `None`, compute the standard-deviation over all elements of :attr:`input` - and return a Variable with a single element, otherwise it must be in - the range :math:`[-rank(input), rank(input))`. If :math:`axis[i] < 0`, - the axis to compute is :math:`rank(input) + axis[i]`. - keepdim (bool, optional): Whether to reserve the reduced dimensions in - the output Variable. The dimensions in :attr:`axis` will be squeezed - and the result Variable will have :attr:`len(axis)` fewer dimensions - than the :attr:`input` unless :attr:`keepdim` is true, default False. - unbiased (bool, optional): Whether to compute standard-deviation via the unbiased - estimator, in which the divisor used in the computation is - :math:`N - 1`, where :math:`N` represents the number of elements - along :attr:`axis`, otherwise the divisor is :math:`N`. Default True. - out (Variable, optional): Alternate output Variable to store the result - standard-deviation . Default None. - name (str, optional): The name for this layer. Normally there is no - need for user to set this property. For more information, please - refer to :ref:`api_guide_Name`. Default None. + x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64. Returns: - Variable: The result standard-deviation with the same dtype as :attr:`input`. - If :attr:`out = None`, returns a new Variable containing the - standard-deviation , otherwise returns a reference to the output Variable. + Tensor: The number of elements for the input Tensor. + + Raises: + TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. + + Examples: .. code-block:: python import paddle - import paddle.fluid as fluid - # x is a Tensor variable with following elements: - # [[0.2, 0.3, 0.5, 0.9] - # [0.1, 0.2, 0.6, 0.7]] - # Each example is followed by the corresponding output tensor. - x = fluid.data(name='x', shape=[2, 4], dtype='float32') - paddle.std(x) # [0.28252685] - paddle.std(x, axis=[0]) # [0.0707107, 0.07071075, 0.07071064, 0.1414217] - paddle.std(x, axis=[-1]) # [0.30956957, 0.29439208] + + paddle.disable_static() + x = paddle.full(shape=[4, 5, 7], fill_value=0, dtype='int32') + numel = paddle.numel(x) # 140 + + """ - check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'std') - - tmp = var(input, axis=axis, keepdim=keepdim, unbiased=unbiased, name=name) - tmp = layers.sqrt(tmp) - if out is not None: - layers.assign(input=tmp, output=out) - return out - else: - return tmp + if in_dygraph_mode(): + return core.ops.size(x) + + if not isinstance(x, Variable): + raise TypeError("x must be a Tensor in numel") + helper = LayerHelper('numel', **locals()) + out = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.INT64) + helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out}) + return out diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..79bec8c4ad34d682895250bc29b1fddb3a569bd4 --- /dev/null +++ b/python/paddle/tests/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/tests/test_metrics.py b/python/paddle/tests/test_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..2272a81b3f602ec46972c9d4620ded9680e2ff5f --- /dev/null +++ b/python/paddle/tests/test_metrics.py @@ -0,0 +1,275 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import os +import unittest +import numpy as np + +import paddle +import paddle.fluid as fluid + +from paddle.incubate.hapi.utils import to_list + + +def accuracy(pred, label, topk=(1, )): + maxk = max(topk) + pred = np.argsort(pred)[:, ::-1][:, :maxk] + correct = (pred == np.repeat(label, maxk, 1)) + + batch_size = label.shape[0] + res = [] + for k in topk: + correct_k = correct[:, :k].sum() + res.append(float(correct_k) / batch_size) + return res + + +def convert_to_one_hot(y, C): + oh = np.random.choice(np.arange(C), C, replace=False).astype('float32') / C + oh = np.tile(oh[np.newaxis, :], (y.shape[0], 1)) + for i in range(y.shape[0]): + oh[i, int(y[i])] = 1. + return oh + + +class TestAccuracy(unittest.TestCase): + def test_acc(self): + paddle.disable_static() + + x = paddle.to_tensor( + np.array([[0.1, 0.2, 0.3, 0.4], [0.1, 0.4, 0.3, 0.2], + [0.1, 0.2, 0.4, 0.3], [0.1, 0.2, 0.3, 0.4]])) + y = paddle.to_tensor(np.array([[0], [1], [2], [3]])) + + m = paddle.metric.Accuracy(name='my_acc') + + # check name + self.assertEqual(m.name(), ['my_acc']) + + correct = m.compute(x, y) + # check results + self.assertEqual(m.update(correct), 0.75) + self.assertEqual(m.accumulate(), 0.75) + + x = paddle.to_tensor( + np.array([[0.1, 0.2, 0.3, 0.4], [0.1, 0.3, 0.4, 0.2], + [0.1, 0.2, 0.4, 0.3], [0.1, 0.2, 0.3, 0.4]])) + y = paddle.to_tensor(np.array([[0], [1], [2], [3]])) + correct = m.compute(x, y) + # check results + self.assertEqual(m.update(correct), 0.5) + self.assertEqual(m.accumulate(), 0.625) + + # check reset + m.reset() + self.assertEqual(m.total[0], 0.0) + self.assertEqual(m.count[0], 0.0) + paddle.enable_static() + + +class TestAccuracyDynamic(unittest.TestCase): + def setUp(self): + self.topk = (1, ) + self.class_num = 5 + self.sample_num = 1000 + self.name = None + + def random_pred_label(self): + label = np.random.randint(0, self.class_num, + (self.sample_num, 1)).astype('int64') + pred = np.random.randint(0, self.class_num, + (self.sample_num, 1)).astype('int32') + pred_one_hot = convert_to_one_hot(pred, self.class_num) + pred_one_hot = pred_one_hot.astype('float32') + + return label, pred_one_hot + + def test_main(self): + with fluid.dygraph.guard(fluid.CPUPlace()): + acc = paddle.metric.Accuracy(topk=self.topk, name=self.name) + for _ in range(10): + label, pred = self.random_pred_label() + label_var = paddle.to_tensor(label) + pred_var = paddle.to_tensor(pred) + state = to_list(acc.compute(pred_var, label_var)) + acc.update(* [s.numpy() for s in state]) + res_m = acc.accumulate() + res_f = accuracy(pred, label, self.topk) + assert np.all(np.isclose(np.array(res_m, dtype='float64'), + np.array(res_f, dtype='float64'), rtol=1e-3)), \ + "Accuracy precision error: {} != {}".format(res_m, res_f) + acc.reset() + assert np.sum(acc.total) == 0 + assert np.sum(acc.count) == 0 + + +class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic): + def setUp(self): + self.topk = (1, 5) + self.class_num = 10 + self.sample_num = 1000 + self.name = "accuracy" + + +class TestAccuracyStatic(TestAccuracyDynamic): + def test_main(self): + main_prog = fluid.Program() + startup_prog = fluid.Program() + main_prog.random_seed = 1024 + startup_prog.random_seed = 1024 + with fluid.program_guard(main_prog, startup_prog): + pred = fluid.data( + name='pred', shape=[None, self.class_num], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + acc = paddle.metric.Accuracy(topk=self.topk, name=self.name) + state = acc.compute(pred, label) + + exe = fluid.Executor(fluid.CPUPlace()) + compiled_main_prog = fluid.CompiledProgram(main_prog) + + for _ in range(10): + label, pred = self.random_pred_label() + state_ret = exe.run(compiled_main_prog, + feed={'pred': pred, + 'label': label}, + fetch_list=[s.name for s in to_list(state)], + return_numpy=True) + acc.update(*state_ret) + res_m = acc.accumulate() + res_f = accuracy(pred, label, self.topk) + assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \ + "Accuracy precision error: {} != {}".format(res_m, res_f) + acc.reset() + assert np.sum(acc.total) == 0 + assert np.sum(acc.count) == 0 + + +class TestAccuracyStaticMultiTopk(TestAccuracyStatic): + def setUp(self): + self.topk = (1, 5) + self.class_num = 10 + self.sample_num = 100 + self.name = "accuracy" + + +class TestPrecision(unittest.TestCase): + def test_1d(self): + paddle.disable_static() + + x = np.array([0.1, 0.5, 0.6, 0.7]) + y = np.array([1, 0, 1, 1]) + + m = paddle.metric.Precision() + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 2. / 3.) + + x = paddle.to_tensor(np.array([0.1, 0.5, 0.6, 0.7, 0.2])) + y = paddle.to_tensor(np.array([1, 0, 1, 1, 1])) + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 4. / 6.) + + paddle.enable_static() + + def test_2d(self): + paddle.disable_static() + + x = np.array([0.1, 0.5, 0.6, 0.7]).reshape(-1, 1) + y = np.array([1, 0, 1, 1]).reshape(-1, 1) + + m = paddle.metric.Precision() + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 2. / 3.) + + x = np.array([0.1, 0.5, 0.6, 0.7, 0.2]).reshape(-1, 1) + y = np.array([1, 0, 1, 1, 1]).reshape(-1, 1) + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 4. / 6.) + + # check reset + m.reset() + self.assertEqual(m.tp, 0.0) + self.assertEqual(m.fp, 0.0) + self.assertEqual(m.accumulate(), 0.0) + + paddle.enable_static() + + +class TestRecall(unittest.TestCase): + def test_1d(self): + paddle.disable_static() + + x = np.array([0.1, 0.5, 0.6, 0.7]) + y = np.array([1, 0, 1, 1]) + + m = paddle.metric.Recall() + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 2. / 3.) + + x = paddle.to_tensor(np.array([0.1, 0.5, 0.6, 0.7])) + y = paddle.to_tensor(np.array([1, 0, 0, 1])) + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 3. / 5.) + + # check reset + m.reset() + self.assertEqual(m.tp, 0.0) + self.assertEqual(m.fn, 0.0) + self.assertEqual(m.accumulate(), 0.0) + paddle.enable_static() + + +class TestAuc(unittest.TestCase): + def test_auc_numpy(self): + paddle.disable_static() + x = np.array([[0.78, 0.22], [0.62, 0.38], [0.55, 0.45], [0.30, 0.70], + [0.14, 0.86], [0.59, 0.41], [0.91, 0.08], [0.16, 0.84]]) + y = np.array([[0], [1], [1], [0], [1], [0], [0], [1]]) + m = paddle.metric.Auc() + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 0.8125) + + m.reset() + self.assertEqual(m.accumulate(), 0.0) + + paddle.enable_static() + + def test_auc_tensor(self): + paddle.disable_static() + x = paddle.to_tensor( + np.array([[0.78, 0.22], [0.62, 0.38], [0.55, 0.45], [0.30, 0.70], + [0.14, 0.86], [0.59, 0.41], [0.91, 0.08], [0.16, 0.84]])) + y = paddle.to_tensor(np.array([[0], [1], [1], [0], [1], [0], [0], [1]])) + m = paddle.metric.Auc() + m.update(x, y) + r = m.accumulate() + self.assertAlmostEqual(r, 0.8125) + + m.reset() + self.assertEqual(m.accumulate(), 0.0) + + paddle.enable_static() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index 27621c2d872a6d10ec3259312abe318fef5b334b..ed0d84d729f0deb12d62095b3a36bac88c6c7be1 100644 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -19,6 +19,14 @@ import warnings import functools import paddle +# NOTE(zhiqiu): Since python 3.2, DeprecationWarning is ignored by default, +# and since python 3.7, it is once again shown by default when triggered directly by code in __main__. +# See details: https://docs.python.org/3/library/warnings.html#default-warning-filter +# The following line set DeprecationWarning to show once, which is expected to work in python 3.2 -> 3.6 +# However, doing this could introduce one samll side effect, i.e., the DeprecationWarning which is not issued by @deprecated. +# The side effect is acceptable, and we will find better way to do this if we could. +warnings.simplefilter('default', DeprecationWarning) + def deprecated(update_to="", since="", reason=""): """Decorate a function to signify its deprecation. @@ -54,7 +62,7 @@ def deprecated(update_to="", since="", reason=""): "paddle." ), 'Argument update_to must start with "paddle.", your value is "{}"'.format( update_to) - msg += ' Use "{}" instead.'.format(_update_to) + msg += ' Please use "{}" instead.'.format(_update_to) if len(_reason) > 0: msg += "\n reason: {}".format(_reason) @@ -70,11 +78,8 @@ def deprecated(update_to="", since="", reason=""): v_since = [int(i) for i in _since.split(".")] v_since += [0] * (4 - len(v_since)) if paddle.__version__ == "0.0.0" or _since == "" or v_current >= v_since: - warnings.simplefilter('always', - DeprecationWarning) # turn off filter warnings.warn(msg, category=DeprecationWarning, stacklevel=2) - warnings.simplefilter('default', - DeprecationWarning) # reset filter + return func(*args, **kwargs) return wrapper diff --git a/python/requirements.txt b/python/requirements.txt index 13a1c9a9d638daf6a78f52d9d66fcf3f15b74c37..28c84c1d630a66077a737a48bb8a26200ec48f0b 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,12 +1,13 @@ +opencv-python<=4.2.0.32 requests>=2.20.0 -numpy>=1.12, <=1.16.4 ; python_version<"3.5" -numpy>=1.12 ; python_version>="3.5" +numpy>=1.13, <=1.16.4 ; python_version<"3.5" +numpy>=1.13 ; python_version>="3.5" protobuf>=3.1.0 -gast>=0.3.3 +gast==0.3.3 matplotlib<=2.2.4 ; python_version<"3.6" scipy>=0.19.0, <=1.2.1 ; python_version<"3.5" nltk>=3.2.2, <=3.4 ; python_version<"3.5" -matplotlib ; python_version>="3.6" +matplotlib<=3.2.1 ; python_version>="3.6" scipy<=1.3.1 ; python_version=="3.5" scipy ; python_version>"3.5" nltk ; python_version>="3.5" diff --git a/python/setup.py.in b/python/setup.py.in index 29bc68444e1e6348218a3ada3233333c2b9f1cde..5b206296bd641bf909115d1c580518afe85a37b6 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -201,6 +201,7 @@ packages=['paddle', 'paddle.nn.functional', 'paddle.nn.layer', 'paddle.nn.initializer', + 'paddle.nn.utils', 'paddle.metric', 'paddle.static', 'paddle.static.nn', @@ -302,6 +303,23 @@ if '${WITH_MKLDNN}' == 'ON': else: package_data['paddle.libs']+=['mkldnn.dll'] +if '${WITH_XPU}' == 'ON': + # only change rpath in Release mode, + if '${CMAKE_BUILD_TYPE}' == 'Release': + if os.name != 'nt': + if "@APPLE@" == "1": + command = "install_name_tool -id \"@loader_path/\" ${XPU_API_LIB}" + else: + command = "patchelf --set-rpath '$ORIGIN/' ${XPU_API_LIB}" + if os.system(command) != 0: + raise Exception("patch ${XPU_API_LIB} failed, command: %s" % command) + shutil.copy('${XPU_API_LIB}', libs_path) + shutil.copy('${XPU_RT_LIB}', libs_path) + shutil.copy('${XPU_SIM_LIB}', libs_path) + package_data['paddle.libs']+=['${XPU_API_LIB_NAME}', + '${XPU_RT_LIB_NAME}', + '${XPU_SIM_LIB_NAME}'] + # copy libfuild_framework.so to libs if os.name != 'nt' and sys.platform != 'darwin': paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}' diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index e2f37bb9aa2e8ed49948e37026a2a87469d6245b..2c575e4abf1beed039d3293821b8df356d4e9295 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -19,7 +19,8 @@ API_FILES=("CMakeLists.txt" "paddle/fluid/framework/ir/node.h" "paddle/fluid/framework/ir/graph.h" "paddle/fluid/framework/framework.proto" - "python/paddle/fleet/__init__.py" + "python/paddle/distributed/__init" + "python/paddle/distributed/fleet/__init__.py" "python/requirements.txt" "python/paddle/fluid/__init__.py" "python/paddle/fluid/compiler.py" @@ -118,7 +119,10 @@ for API_FILE in ${API_FILES[*]}; do elif [ "${API_FILE}" == "tools/wlist.json" ];then echo_line="You must have one TPM (jzhang533) approval for the api whitelist for the tools/wlist.json.\n" check_approval 1 29231 - elif [ "${API_FILE}" == "python/paddle/fleet/__init__.py" ]; then + elif [ "${API_FILE}" == "python/paddle/distributed/fleet/__init__.py" ]; then + echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " + check_approval 1 35550832 38231817 + elif [ "${API_FILE}" == "python/paddle/distributed/__init__.py" ]; then echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " check_approval 1 35550832 38231817 else diff --git a/tools/dockerfile/Dockerfile.ubuntu b/tools/dockerfile/Dockerfile.ubuntu index f424d676f70b127d84469bd70d9e7161a93f7bba..aa547fe74163fdbe08c1266817b7f1903e541ad5 100644 --- a/tools/dockerfile/Dockerfile.ubuntu +++ b/tools/dockerfile/Dockerfile.ubuntu @@ -219,4 +219,11 @@ RUN wget -q http://mirrors.kernel.org/ubuntu/pool/universe/p/patchelf/patchelf_0 RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config CMD source ~/.bashrc +# ccache 3.7.9 +RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ + tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ + ./configure -prefix=/usr/local/ccache-3.7.9 && \ + make -j8 && make install && \ + ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache + EXPOSE 22 diff --git a/tools/wlist.json b/tools/wlist.json index 6989882504eded7c56851e6e9351cef9b4975137..6a0360fbcd9d06885d8dff7044e0cce63fa7d92c 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -63,7 +63,6 @@ "Compressor", "Compressor.config", "Compressor.run", - "run_check", "HDFSClient.upload", "HDFSClient.download", "HDFSClient.is_exist", @@ -107,12 +106,27 @@ "Metric.update", "Metric.accumulate", "Metric.name", - "Metric.add_metric_op", + "Metric.compute", "Accuracy.reset", "Accuracy.update", "Accuracy.accumulate", "Accuracy.name", - "Accuracy.add_metric_op", + "Accuracy.compute", + "Precision.reset", + "Precision.update", + "Precision.accumulate", + "Precision.name", + "Precision.compute", + "Recall.reset", + "Recall.update", + "Recall.accumulate", + "Recall.name", + "Recall.compute", + "Auc.reset", + "Auc.update", + "Auc.accumulate", + "Auc.name", + "Auc.compute", "Callback.set_params", "Callback.on_train_begin", "Callback.on_train_end",