diff --git a/.gitignore b/.gitignore index 7480bd53a403d74932d56409fdb0a9dd7bb6b9d6..020d3f0c303f7d850f4ec9c0efe58ab2d57dce2e 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,3 @@ cmake_install.cmake paddle/.timestamp python/paddlepaddle.egg-info/ paddle/pybind/pybind.h -python/paddle/v2/framework/tests/tmp/* diff --git a/CMakeLists.txt b/CMakeLists.txt index fd3582a1bca199d62d19550ffdd1efe9db520fa7..65164b8472b902be8b0b9d5fb99807d012b8a666 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,7 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -82,10 +81,8 @@ if(ANDROID OR IOS) "Disable PYTHON when cross-compiling for Android and iOS" FORCE) set(WITH_RDMA OFF CACHE STRING "Disable RDMA when cross-compiling for Android and iOS" FORCE) - set(WITH_MKLDNN OFF CACHE STRING - "Disable MKLDNN when cross-compiling for Android and iOS" FORCE) - set(WITH_MKLML OFF CACHE STRING - "Disable MKLML package when cross-compiling for Android and iOS" FORCE) + set(WITH_MKL OFF CACHE STRING + "Disable MKL when cross-compiling for Android and iOS" FORCE) # Compile PaddlePaddle mobile inference library if (NOT WITH_C_API) @@ -111,6 +108,14 @@ else() set(THIRD_PARTY_BUILD_TYPE Release) endif() +set(WITH_MKLML ${WITH_MKL}) +if (WITH_MKL AND AVX2_FOUND) + set(WITH_MKLDNN ON) +else() + message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN") + set(WITH_MKLDNN OFF) +endif() + ######################################################################################## include(external/mklml) # download mklml package @@ -158,14 +163,15 @@ set(EXTERNAL_LIBS ) if(WITH_GPU) - list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) - if(NOT WITH_DSO) - list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) - endif(NOT WITH_DSO) + include(cuda) endif(WITH_GPU) +if(WITH_MKLML) + list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) +endif() + if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) endif() if(USE_NNPACK) diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md index 040f5ffa41968cbf93a817faa1db86c18956341e..16c2390fd31bf1c79f29735fb98180d3f7302eb2 100644 --- a/benchmark/IntelOptimizedPaddle.md +++ b/benchmark/IntelOptimizedPaddle.md @@ -12,11 +12,11 @@ Machine: System: CentOS release 6.3 (Final), Docker 1.12.1. -PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0) - -- MKL-DNN tag v0.10 -- MKLML 2018.0.20170720 +PaddlePaddle: paddlepaddle/paddle:latest (for MKLML and MKL-DNN), paddlepaddle/paddle:latest-openblas (for OpenBLAS) +- MKL-DNN tag v0.11 +- MKLML 2018.0.1.20171007 - OpenBLAS v0.2.20 +(TODO: will rerun after 0.11.0) On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. @@ -31,17 +31,37 @@ Input image size - 3 * 224 * 224, Time: images/second | BatchSize | 64 | 128 | 256 | |--------------|-------| -----| --------| -| OpenBLAS | 7.82 | 8.62 | 10.34 | -| MKLML | 11.02 | 12.86 | 15.33 | -| MKL-DNN | 27.69 | 28.8 | 29.27 | +| OpenBLAS | 7.80 | 9.00 | 10.80 | +| MKLML | 12.12 | 13.70 | 16.18 | +| MKL-DNN | 28.46 | 29.83 | 30.44 | + + +chart on batch size 128 +TBD + + - ResNet-50 + +| BatchSize | 64 | 128 | 256 | +|--------------|-------| ------| -------| +| OpenBLAS | 25.22 | 25.68 | 27.12 | +| MKLML | 32.52 | 31.89 | 33.12 | +| MKL-DNN | 81.69 | 82.35 | 84.08 | chart on batch size 128 TBD - - ResNet - GoogLeNet +| BatchSize | 64 | 128 | 256 | +|--------------|-------| ------| -------| +| OpenBLAS | 89.52 | 96.97 | 108.25 | +| MKLML | 128.46| 137.89| 158.63 | +| MKL-DNN     | 250.46| 264.83| 269.50 | + +chart on batch size 128 +TBD + ### Laptop TBD ### Desktop diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py index bc893bab98c4d2e07c62fbd012d51a0939db4766..a88ecac67d9e677f14f6dc24ba9a337b1245243f 100644 --- a/benchmark/paddle/image/googlenet.py +++ b/benchmark/paddle/image/googlenet.py @@ -5,6 +5,7 @@ height = 224 width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) +use_gpu = get_config_arg('use_gpu', bool, True) args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} define_py_data_sources2( @@ -16,6 +17,8 @@ settings( learning_method=MomentumOptimizer(0.9), regularization=L2Regularization(0.0005 * batch_size)) +conv_projection = conv_projection if use_gpu else img_conv_layer + def inception2(name, input, channels, \ filter1, filter3R, filter3, @@ -138,7 +141,7 @@ def inception(name, input, channels, \ cat = concat_layer( name=name, input=[cov1, cov3, cov5, covprj], - bias_attr=True, + bias_attr=True if use_gpu else False, act=ReluActivation()) return cat diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index a4527e04968cf8c8c3c31d16f50bc3e28381f6d8..f768f6c29a84b40f917e0ccfde4d8c15f65c818b 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -1,9 +1,7 @@ set -e function train() { - unset OMP_NUM_THREADS MKL_NUM_THREADS - export OMP_DYNAMIC="FALSE" - export KMP_AFFINITY="granularity=fine,compact,0,0" + unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY topology=$1 layer_num=$2 bs=$3 @@ -14,8 +12,6 @@ function train() { elif [ $4 == "False" ]; then thread=`nproc` # each trainer_count use only 1 core to avoid conflict - export OMP_NUM_THREADS=1 - export MKL_NUM_THREADS=1 log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" else echo "Wrong input $3, use True or False." @@ -44,6 +40,7 @@ fi for use_mkldnn in True False; do for batchsize in 64 128 256; do train vgg 19 $batchsize $use_mkldnn - train resnet 50 $batchsize $use_mkldnn + train resnet 50 $batchsize $use_mkldnn + train googlenet v1 $batchsize $use_mkldnn done done diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 24ddb24399dabeec9b8e5faf36be3eb21f420111..e550ec285668ea25757eeee9e7c5dc48fc9d339d 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -76,27 +76,14 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) -if(WITH_MKLDNN) - add_definitions(-DPADDLE_USE_MKLDNN) - if (WITH_MKLML AND MKLDNN_IOMP_DIR) - message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") - set(OPENMP_FLAGS "-fopenmp") - set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") - else() - find_package(OpenMP) - if(OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - else() - message(WARNING "Can not find OpenMP." - "Some performance features in MKLDNN may not be available") - endif() - endif() - -endif(WITH_MKLDNN) +if (WITH_MKLML AND MKLML_IOMP_LIB) + message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") +endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 310450f7d009dc0cdae9c0079a96445af8ec8f95..d3f5bf6852b3b295f3b5806b0577a880b0ce6ba6 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") # Set the architecture for iOS if(NOT DEFINED IOS_ARCH) if(IOS_PLATFORM STREQUAL "OS") - # FIXME(liuyiqun): support "armv7;armv7s;arm64" future - set(IOS_ARCH "arm64") + set(IOS_ARCH "armv7;armv7s;arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") - # FIXME(liuyiqun): support "i386;x86_64" future - set(IOS_ARCH "x86_64") + set(IOS_ARCH "i386;x86_64") endif() endif() set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") @@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_ # Hidden visibilty is required for cxx on iOS set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") -set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") +set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake new file mode 100644 index 0000000000000000000000000000000000000000..6bea7cf3022242ce48cc882915f7e71810937283 --- /dev/null +++ b/cmake/cuda.cmake @@ -0,0 +1,188 @@ +if(NOT WITH_GPU) + return() +endif() + +set(paddle_known_gpu_archs "30 35 50 52 60 61 70") +set(paddle_known_gpu_archs7 "30 35 50 52") +set(paddle_known_gpu_archs8 "30 35 50 52 60 61") + +###################################################################################### +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# detect_installed_gpus(out_variable) +function(detect_installed_gpus out_variable) + if(NOT CUDA_gpu_detect_output) + set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) + + file(WRITE ${cufile} "" + "#include \n" + "int main() {\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device) {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + + execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CUDA_HOST_COMPILER}" + "--run" "${cufile}" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" + RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(nvcc_res EQUAL 0) + # only keep the last line of nvcc_out + STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") + STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") + list(GET nvcc_out -1 nvcc_out) + string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") + set(CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE) + endif() + endif() + + if(NOT CUDA_gpu_detect_output) + message(STATUS "Automatic GPU detection failed. Building for all known architectures.") + set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) + else() + set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) + endif() +endfunction() + + +######################################################################## +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME +# Usage: +# select_nvcc_arch_flags(out_variable) +function(select_nvcc_arch_flags out_variable) + # List of arch names + set(archs_names "Kepler" "Maxwell" "Pascal" "All" "Manual") + set(archs_name_default "All") + if(NOT CMAKE_CROSSCOMPILING) + list(APPEND archs_names "Auto") + endif() + + # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) + set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names} ) + mark_as_advanced(CUDA_ARCH_NAME) + + # verify CUDA_ARCH_NAME value + if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};") + string(REPLACE ";" ", " archs_names "${archs_names}") + message(FATAL_ERROR "Only ${archs_names} architeture names are supported.") + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Manual") + set(CUDA_ARCH_BIN ${paddle_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") + set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") + mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) + else() + unset(CUDA_ARCH_BIN CACHE) + unset(CUDA_ARCH_PTX CACHE) + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Kepler") + set(cuda_arch_bin "30 35") + elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") + set(cuda_arch_bin "50") + elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") + set(cuda_arch_bin "60 61") + elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") + set(cuda_arch_bin "70") + elseif(${CUDA_ARCH_NAME} STREQUAL "All") + set(cuda_arch_bin ${paddle_known_gpu_archs}) + elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") + detect_installed_gpus(cuda_arch_bin) + else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") + set(cuda_arch_bin ${CUDA_ARCH_BIN}) + endif() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + list(REMOVE_DUPLICATES cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_ptx) + + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified PTX for the concrete BIN + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) +endfunction() + +message(STATUS "CUDA detected: " ${CUDA_VERSION}) +if (${CUDA_VERSION} LESS 7.0) + set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) +elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") +elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + # CUDA 8 may complain that sm_20 is no longer supported. Suppress the + # warning for now. + list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") +endif() + +include_directories(${CUDA_INCLUDE_DIRS}) +list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) +if(NOT WITH_DSO) + list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) +endif(NOT WITH_DSO) + +# setting nvcc arch flags +select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) +list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) +message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") + +# Set C++11 support +set(CUDA_PROPAGATE_HOST_FLAGS OFF) + +# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. +# So, don't set these flags here. +list(APPEND CUDA_NVCC_FLAGS "-std=c++11") +list(APPEND CUDA_NVCC_FLAGS "--use_fast_math") +list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") +# Set :expt-relaxed-constexpr to suppress Eigen warnings +list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) +elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) +elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) +endif() + +mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) +mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 5a06825beb73e85d8a55b7b578b187bee2c4340c..fc52d339d7a336b44c97f2e0a9fc8d6604854365 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) IF(${CBLAS_PROVIDER} STREQUAL "MKLML") SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) - SET(MKLDNN_MKLROOT ${MKLML_ROOT}) - SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) - SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) - MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}") + MESSAGE(STATUS "Build MKLDNN with MKLML ${MKLML_ROOT}") +ELSE() + MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") ENDIF() SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow") @@ -57,15 +56,16 @@ ExternalProject_Add( PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} - CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} + CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} - -DMKLROOT:PATH=${MKLDNN_MKLROOT} + -DMKLROOT:PATH=${MKLML_ROOT} ) ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) -MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}") +MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") +add_definitions(-DPADDLE_USE_MKLDNN) LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 05d83ad58ef8485d36829e7aeede79f625cfdc43..4c4f59656dae68739f2f07f3febd510e727fe2dd 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND}) "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) - SET(OPENBLAS_CC "${CMAKE_C_COMPILER}") + SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") IF(CMAKE_CROSSCOMPILING) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) @@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND}) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() ELSEIF(IOS) - # FIXME(liuyiqun): support multiple architectures - SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") - SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") - SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) - ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) + ELSE() + MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. " + "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.") ENDIF() ELSEIF(RPI) # use hardfp @@ -98,7 +97,7 @@ IF(NOT ${CBLAS_FOUND}) ENDIF() INSTALL(CODE "execute_process( COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib - destination ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR} + ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR} )" ) INSTALL(CODE "MESSAGE(STATUS \"Installing: \" diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 8bd058222880b4df3b08da09c02f9fe7f1d0ee66..a8e1aca49c97df256b1269c286b0bce7732fa932 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(MOBILE_INFERENCE) + return() +ENDIF() + INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 4593ae6180b6d7deb61d897eb634b17ac0bb1683..2b125cef6aa8d1021afe8a7a0d232d84d36be4bc 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -149,58 +149,3 @@ endforeach() foreach(flag ${GPU_COMMON_FLAGS}) safe_set_nvflag(${flag}) endforeach() - - -set(CUDA_PROPAGATE_HOST_FLAGS OFF) - -# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. -# So, don't set these flags here. -LIST(APPEND CUDA_NVCC_FLAGS -std=c++11) -LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math) - -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - LIST(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) -elseif(CMAKE_BUILD_TYPE STREQUAL "Release") - LIST(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) -elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - LIST(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) -elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") - LIST(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) -endif() - -function(specify_cuda_arch cuda_version cuda_arch) - if(${cuda_version} VERSION_GREATER "8.0") - foreach(capability 61 62) - if(${cuda_arch} STREQUAL ${capability}) - list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") - endif() - endforeach() - elseif(${cuda_version} VERSION_GREATER "7.0" and ${cuda_arch} STREQUAL "53") - list(APPEND __arch_flags " -gencode arch=compute_${cuda_arch},code=sm_${cuda_arch}") - endif() -endfunction() - -# Common gpu architectures: Kepler, Maxwell -foreach(capability 30 35 50) - list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}") -endforeach() - -if (CUDA_VERSION VERSION_GREATER "7.0" OR CUDA_VERSION VERSION_EQUAL "7.0") - list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52") -endif() - -# Modern gpu architectures: Pascal -if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0") - list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") - list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr) -endif() - -# Custom gpu architecture -set(CUDA_ARCH) - -if(CUDA_ARCH) - specify_cuda_arch(${CUDA_VERSION} ${CUDA_ARCH}) -endif() - -set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS}) - diff --git a/cmake/util.cmake b/cmake/util.cmake index 117ab7f49cdf4a568cd203b2b17767643d0b2d50..ad905ab55ba3537054fa5b30b5fca4d83c406702 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME) target_link_libraries(${TARGET_NAME} log) endif(ANDROID) - if(WITH_MKLDNN AND WITH_MKLML AND MKLDNN_IOMP_DIR) - target_link_libraries(${TARGET_NAME} "-L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB) + target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") endif() add_dependencies(${TARGET_NAME} ${external_project_dependencies}) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 203506d7ab84e5a5be2232b077eac2d433a99766..d4d182f6692e09b3e40f3620b77d9a0f20ec5af3 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -335,6 +335,16 @@ bilinear_interp .. autoclass:: paddle.v2.layer.bilinear_interp :noindex: +dot_prod +--------- +.. autoclass:: paddle.v2.layer.dot_prod + :noindex: + +out_prod +-------- +.. autoclass:: paddle.v2.layer.out_prod + :noindex: + power ----- .. autoclass:: paddle.v2.layer.power @@ -372,6 +382,11 @@ cos_sim .. autoclass:: paddle.v2.layer.cos_sim :noindex: +l2_distance +----------- +.. autoclass:: paddle.v2.layer.l2_distance + :noindex: + trans ----- .. autoclass:: paddle.v2.layer.trans diff --git a/doc/design/evaluator.md b/doc/design/evaluator.md new file mode 100644 index 0000000000000000000000000000000000000000..a62d75ffef14962aec8c7587e172d78dfe0cb4be --- /dev/null +++ b/doc/design/evaluator.md @@ -0,0 +1,58 @@ +## Evaluator Design + +### The Problem + +During training or serving, we provide the evaluation function to measure the model performance, e.g., accuracy, precision. In the operator based framework design, the data go through the network pipeline batch by batch. As a result, inside the operator, we only can calculate one minibatch metrics. We need to provide a mechanism to calculate the metrics for each N pass/batch the user wanted. + +### Evaluator Design +Currently, every operation is expressed in the graph. we divide the evaluator process into three steps. + +1. Initialize the metric state and add it into the block. + +2. Calculate the statistic of the metric state in every mini-batch. The single operator is only responsible for calculating necessary statistics for one mini-batch. For example, accuracy operator only calculate a minibatch data if run once. + + +3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices. + +### Implementation +This design is shown in python API. +Each metric operator need to caculate the metric statistic and return the batch aware states, Python side responsible for accumulate the states for each pass. + + +```python +class Evaluator(object): + """ + Evaluator Base class. + """ + def __init__(self, name, **kwargs): + """ + Different evaluator may has different metric states. E.g, Accuracy need two variables, total and right sample counts. + Auc need four variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives`. So every evaluator should create its needed variables and append to main_program + + The initialization of Evaluator should be responsible for: + create metric states and append to the main_program + """ + pass + + def _update_ops(self, input, label, **kwargs) + """ + Add mini-batch evaluator caculate operators to the main_program. + Add increment operator to accumulate the metric states. + """ + + + def reset(self, executor, reset_program=None): + """ + Reset metric states at the begin of each pass/user specified batch number. + Execute the reset_program to reset the states. + """ + + + def eval(self, executor, eval_program=None): + """ + Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. + Execute the eval_program and return the result. + """ + return eval_result +``` diff --git a/doc/design/mkldnn/README.MD b/doc/design/mkldnn/README.MD index 16236763a73770f3fe5eadf67645765d0456f875..ec6d4681836e189f46dbb9b915a237dc15cda7cf 100644 --- a/doc/design/mkldnn/README.MD +++ b/doc/design/mkldnn/README.MD @@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA. 我们把集成方案大致分为了如下几个方面。 ### CMake -我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。 +我们会在`CMakeLists.txt`中会给用户添加一个`WITH_MKL`的开关,他是负责`WITH_MKLML`和`WITH_MKLDNN`的总开关。 -同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。 +当打开`WITH_MKL`时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功能。 -所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 +当关闭`WITH_MKL`时,MKLML和MKL-DNN功能会同时关闭。 -**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。 +所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 ### Layers 所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在 diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst index 731a63f945c29ba78538b3d71289b234e569354d..61f3a223547b352cf7929615cf3682b29b9a738f 100644 --- a/doc/howto/dev/write_docs_cn.rst +++ b/doc/howto/dev/write_docs_cn.rst @@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。 cd TO_YOUR_PADDLE_CLONE_PATH mkdir -p build cd build - cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON + cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON make gen_proto_py make paddle_docs paddle_docs_cn diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md index 882066f23714f7ab3bba9199b5fa5ff2325ce849..424d7718c64438496cf0895397babd5408e1ca02 100644 --- a/doc/mobile/cross_compiling_for_android_cn.md +++ b/doc/mobile/cross_compiling_for_android_cn.md @@ -1,4 +1,4 @@ -# 构建Android平台上的PaddlePaddle库 +# Android平台编译指南 用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库: - 基于Docker容器的编译方式 diff --git a/doc/mobile/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md index cda636a67de712e072f4cc7ad859dda75211eaa8..9da48e7f2119ce901fbb3abab73400df27be16d2 100644 --- a/doc/mobile/cross_compiling_for_ios_cn.md +++ b/doc/mobile/cross_compiling_for_ios_cn.md @@ -1,4 +1,4 @@ -# 构建iOS平台上的PaddlePaddle库 +# iOS平台编译指南 交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。 ## 准备交叉编译环境 @@ -25,7 +25,7 @@ iOS平台可选配置参数: - `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。 - `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 -- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: +- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构: @@ -41,11 +41,11 @@ iOS平台可选配置参数: - + - +
OSarmv7, armv7s, arm64 (默认)armv7, armv7s, arm64
SIMULATORi386, x86_64 (默认)i386, x86_64
@@ -66,7 +66,7 @@ iOS平台可选配置参数: ```bash cmake -DCMAKE_SYSTEM_NAME=iOS \ -DIOS_PLATFORM=OS \ - -DIOS_ARCH="arm64" \ + -DIOS_ARCH="armv7;arm64" \ -DIOS_ENABLE_BITCODE=ON \ -DIOS_USE_VECLIB_FOR_BLAS=ON \ -DCMAKE_INSTALL_PREFIX=your/path/to/install \ @@ -112,6 +112,6 @@ $ make install - `lib`目录,其中包含PaddlePaddle的C-API静态库 - `third_party`目录,其中包含所依赖的所有第三方库 -注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。 +注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。 自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。 diff --git a/doc/mobile/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md index 6e983645faaed1f67edaeeb82ddbef9cef6bb85f..f8ef9dc8031613831437745995268f3abc392f5b 100644 --- a/doc/mobile/cross_compiling_for_raspberry_cn.md +++ b/doc/mobile/cross_compiling_for_raspberry_cn.md @@ -1,4 +1,4 @@ -# 构建Raspberry Pi平台上的PaddlePaddle库 +# Raspberry Pi平台编译指南 通常有两个方法来构建基于 Rasspberry Pi 的版本: diff --git a/paddle/capi/Main.cpp b/paddle/capi/Main.cpp index 78c43949dfe325d0e1a6ba10ae51cb7b858f6c52..bb8249a5511c089ec2f2263ff4cc290f0a5a8fce 100644 --- a/paddle/capi/Main.cpp +++ b/paddle/capi/Main.cpp @@ -29,6 +29,9 @@ static void initPaddle(int argc, char** argv) { extern "C" { paddle_error paddle_init(int argc, char** argv) { + static bool isInit = false; + if (isInit) return kPD_NO_ERROR; + std::vector realArgv; realArgv.reserve(argc + 1); realArgv.push_back(strdup("")); @@ -37,6 +40,7 @@ paddle_error paddle_init(int argc, char** argv) { } initPaddle(argc + 1, realArgv.data()); free(realArgv[0]); + isInit = true; return kPD_NO_ERROR; } } diff --git a/paddle/capi/examples/model_inference/dense/main.c b/paddle/capi/examples/model_inference/dense/main.c index 876af2aa7615c098d225b56ce2ea0b1529a6e3c6..5eeaf7e31fac7c9ed0b9269e74a7e467bde155ef 100644 --- a/paddle/capi/examples/model_inference/dense/main.c +++ b/paddle/capi/examples/model_inference/dense/main.c @@ -1,5 +1,6 @@ #include #include + #include "../common/common.h" #define CONFIG_BIN "./trainer_config.bin" @@ -27,20 +28,19 @@ int main() { CHECK(paddle_arguments_resize(in_args, 1)); // Create input matrix. - paddle_matrix mat = paddle_matrix_create(/* sample_num */ 10, + paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, /* size */ 784, /* useGPU */ false); srand(time(0)); - std::vector input; - input.resize(784 * 10); + paddle_real* array; + + // Get First row. + CHECK(paddle_matrix_get_row(mat, 0, &array)); - for (int i = 0; i < input.size(); ++i) { - input[i] = rand() / ((float)RAND_MAX); + for (int i = 0; i < 784; ++i) { + array[i] = rand() / ((float)RAND_MAX); } - - // Set value for the input matrix - CHECK(paddle_matrix_set_value(mat, input.data())); CHECK(paddle_arguments_set_value(in_args, 0, mat)); @@ -53,17 +53,18 @@ int main() { CHECK(paddle_arguments_get_value(out_args, 0, prob)); - std::std::vector result; - int height; - int width; + uint64_t height; + uint64_t width; - CHECK(paddle_matrix_get_shape(prob, &height, &width); - result.resize(height * width); - CHECK(paddle_matrix_get_value(prob, result.data())); + CHECK(paddle_matrix_get_shape(prob, &height, &width)); + CHECK(paddle_matrix_get_row(prob, 0, &array)); - printf("Prob: "); + printf("Prob: \n"); for (int i = 0; i < height * width; ++i) { - printf("%.2f ", result[i]); + printf("%.4f ", array[i]); + if ((i + 1) % width == 0) { + printf("\n"); + } } printf("\n"); diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h index ede2670882ee2b93f610a2261a4ecc1784bc2d0c..4ab8de80d1c7be0f8e3eb848955373dd5e21bc18 100644 --- a/paddle/cuda/include/hl_gpu.h +++ b/paddle/cuda/include/hl_gpu.h @@ -25,7 +25,9 @@ limitations under the License. */ #include "hl_matrix.h" #include "hl_sequence.h" #include "hl_sparse.h" +#ifndef PADDLE_MOBILE_INFERENCE #include "hl_warpctc_wrap.h" +#endif #ifdef HPPL_STUB_FUNC #include "stub/hl_aggregate_stub.h" diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 1afc5242081e7f7b12527a15d29421cebeb3d3b8..c08e844847737b1172f6453767cc7f5e7b1a2bda 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -38,9 +38,9 @@ py_proto_compile(framework_py_proto SRCS framework.proto) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) add_custom_command(TARGET framework_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto - COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/ - COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto + COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto/ + COMMENT "Copy generated python proto into directory paddle/v2/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) cc_library(backward SRCS backward.cc DEPS net_op) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index b3b9c45ded95ce2e735b8898d47760956dcacdce..b9018ecdba8303fd6b37c87edd99e192aa604228 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector& names, return false; } } + if (VLOG_IS_ON(10)) { + std::ostringstream sout; + sout << "All input {"; + for (auto& name : names) { + sout << name << ","; + } + sout << "} is in {"; + for (auto& name : set) { + sout << name << ","; + } + sout << "}"; + VLOG(10) << sout.str(); + } return true; } @@ -290,14 +303,12 @@ static void CreateGradVarInBlock( auto ops = block_desc->AllOps(); for (size_t op_index = grad_op_start_index; op_index < ops.size(); ++op_index) { - bool need_infer_shape = false; std::unordered_set new_vars; ForEachVarName(ops[op_index]->Outputs(), [&](const std::string& grad_var_name) { if (block_desc->HasVar(grad_var_name)) { return false; } - need_infer_shape = true; auto var = block_desc->Var(grad_var_name); new_vars.insert(var->Name()); auto it = param_name_map.find(grad_var_name); @@ -311,23 +322,21 @@ static void CreateGradVarInBlock( grad_record.op_idx_ = static_cast(op_index); return false; /* not break */ }); - if (need_infer_shape) { - ops[op_index]->InferVarType(block_desc); - for (auto& arg : ops[op_index]->OutputArgumentNames()) { - if (new_vars.find(arg) == new_vars.end()) { - continue; - } - auto pname = FwdName(arg); - auto* param = block_desc->FindVarRecursive(pname); - auto* grad = block_desc->FindVar(arg); - if (param == nullptr) { - grad->SetDataType(DataType::FP32); - } else { - grad->SetDataType(param->GetDataType()); - } + ops[op_index]->InferVarType(block_desc); + for (auto& arg : ops[op_index]->OutputArgumentNames()) { + if (new_vars.find(arg) == new_vars.end()) { + continue; + } + auto pname = FwdName(arg); + auto* param = block_desc->FindVarRecursive(pname); + auto* grad = block_desc->FindVar(arg); + if (param == nullptr) { + grad->SetDataType(DataType::FP32); + } else { + grad->SetDataType(param->GetDataType()); } - ops[op_index]->InferShape(*block_desc); } + ops[op_index]->InferShape(*block_desc); } } @@ -387,6 +396,7 @@ std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var) { + VLOG(5) << "MakeBlockBackward"; BlockDescBind* cur_block = program_desc.MutableBlock(block_idx); std::vector op_descs = cur_block->AllOps(); std::unordered_map> dup_out_ops; @@ -394,9 +404,10 @@ std::vector> MakeBlockBackward( std::vector> backward_descs; for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { + VLOG(5) << "Making backward " << (*it)->Type() << " op"; std::vector> op_grads; - if ((*it)->Type() == "recurrent") { + if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") { int step_block_idx = (*it)->GetBlockAttr("step_block"); BlockDescBind* backward_block = CreateStepBlock( program_desc, no_grad_vars, grad_to_var, step_block_idx); @@ -410,6 +421,15 @@ std::vector> MakeBlockBackward( op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var); } + if (VLOG_IS_ON(10)) { + std::ostringstream sout; + sout << "Made "; + for (auto& op_grad : op_grads) { + sout << op_grad->Type() << " "; + } + VLOG(10) << sout.str(); + } + for (const auto& desc : op_grads) { for (const std::string& out_name : desc->OutputArgumentNames()) { if (out_name.find("@GRAD") == std::string::npos) { @@ -425,6 +445,8 @@ std::vector> MakeBlockBackward( op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs), [](std::unique_ptr& ptr) { return std::move(ptr); }); } + + VLOG(5) << "Appending Sums"; // Check whether some variables are written more than once std::list>> pending_sum_ops; for (const auto& dup : dup_out_ops) { @@ -432,16 +454,22 @@ std::vector> MakeBlockBackward( const std::vector dup_op = dup.second; if (out_name != kEmptyVarName && dup_op.size() > 1) { std::vector sum_op_inputs; + std::string next_g_name = out_name; for (size_t i = 0; i < dup_op.size(); ++i) { + VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name + << " duplicated"; std::string new_name = out_name + "@RENAME@" + std::to_string(i); - backward_descs[dup_op[i]]->Rename(out_name, new_name); + backward_descs[dup_op[i]]->RenameOutput(out_name, new_name); + backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name); sum_op_inputs.emplace_back(new_name); + next_g_name = sum_op_inputs.back(); } std::unique_ptr sum_op(new OpDescBind( "sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {})); pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)}); } } + pending_sum_ops.sort( [](const std::pair>& a, const std::pair>& b) { @@ -452,6 +480,8 @@ std::vector> MakeBlockBackward( std::move(p.second)); } + VLOG(5) << "MakeBlockBackward Finished"; + return backward_descs; } @@ -483,19 +513,14 @@ ParamGradInfoMap AppendBackward( const int root_block_idx = 0; auto root_block = program_desc.MutableBlock(root_block_idx); - // insert fill one op for target - // TODO(qiao) add some check to the target. std::string fill_one_op_out = GradVarName(target.Name()); - std::vector target_shape_desc = target.Shape(); - std::vector target_shape; - std::transform(target_shape_desc.begin(), target_shape_desc.end(), - std::back_inserter(target_shape), - [](int64_t dim) { return static_cast(dim); }); + bool is_scalar = target.Shape() == std::vector{1}; + PADDLE_ENFORCE(is_scalar, "target should be scalar"); VLOG(3) << "backward from loss=" << target.Name() << " data_type=" << target.GetDataType(); std::unique_ptr fill_one_op( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, - {{"shape", target_shape}, + {{"shape", std::vector{1}}, {"value", static_cast(1.0)}, {"data_type", target.GetDataType()}})); // infer var type of fill_one_op diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index d485cdf6109274377ad0057223bdd8401e964aa7..2b858f5ea0874d7bf1a9cf38529f5d0d70cca7f2 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) { op->SetOutput("Out", {"out"}); auto target = f::VarDescBind("out"); + target.SetShape({1}); auto var_to_grad = AppendBackward(program, target, {}); ASSERT_EQ(block->AllOps().size(), 3UL); @@ -544,6 +545,7 @@ TEST(Backward, default_attribute) { op->CheckAttrs(); auto target = f::VarDescBind("out"); + target.SetShape({1}); AppendBackward(program, target, {}); ASSERT_EQ(block->AllOps().size(), 3UL); @@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) { op3->SetOutput("Out", {"out3"}); auto target = f::VarDescBind("out3"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {}); @@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) { op4->SetOutput("Out", {"out4"}); auto target = f::VarDescBind("out4"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"out3"}); @@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) { op2->SetOutput("Z", {"z2"}); auto target = f::VarDescBind("z2"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"z1"}); @@ -810,6 +815,7 @@ TEST(Backward, shared_var) { op3->SetOutput("Out", {"out3"}); auto target = f::VarDescBind("out3"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {}); @@ -888,6 +894,7 @@ TEST(Backward, half_backward) { op1->SetOutput("Out", {"out"}); auto target = f::VarDescBind("out"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"b"}); f::OpDescBind *fill_op = block->AllOps()[forward_len]; diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h index 3ec88d7a72c3339bf5e7d0ca3957a3f608f039b7..c54d2d4ddf09c445fb25c1fbe8a7498f233d8212 100644 --- a/paddle/framework/data_type.h +++ b/paddle/framework/data_type.h @@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) { return DataType::INT32; } else if (typeid(int64_t).hash_code() == type.hash_code()) { return DataType::INT64; + } else if (typeid(bool).hash_code() == type.hash_code()) { + return DataType::BOOL; } else { PADDLE_THROW("Not supported"); } @@ -44,6 +46,8 @@ inline std::type_index ToTypeIndex(DataType type) { return typeid(int); case DataType::INT64: return typeid(int64_t); + case DataType::BOOL: + return typeid(bool); default: PADDLE_THROW("Not support type %d", type); } @@ -64,6 +68,9 @@ inline void VisitDataType(DataType type, Visitor visitor) { case DataType::INT64: visitor.template operator()(); break; + case DataType::BOOL: + visitor.template operator()(); + break; default: PADDLE_THROW("Not supported"); } diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 53b899a23997b71e723a298ec360a4e018d89878..8b6f42b82df14bfcd25f33ef16b5903fb965a8ba 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) { ddim = make_dim<9>(dims); break; default: - throw std::invalid_argument( - "Dynamic dimensions must have between [1, 9] dimensions."); + PADDLE_THROW("Dynamic dimensions must have between [1, 9] dimensions."); } } diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 2fcf41d69f0011b0d9a3d89c97fcebacb0703e97..adedd8cb0e8504fd6fc924e62a2ede3c1c7ce698 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); + VLOG(10) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 39c8def82e1ebb10a0e357a648af760099020c32..48cd131550dea5ad3f368b25c31d753efbe0dff9 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name, need_update_ = true; } +void OpDescBind::RenameOutput(const std::string &old_name, + const std::string &new_name) { + for (auto &output : outputs_) { + std::replace(output.second.begin(), output.second.end(), old_name, + new_name); + } + need_update_ = true; +} + +void OpDescBind::RenameInput(const std::string &old_name, + const std::string &new_name) { + for (auto &input : inputs_) { + std::replace(input.second.begin(), input.second.end(), old_name, new_name); + } + need_update_ = true; +} + struct SetAttrDescVisitor : public boost::static_visitor { explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} mutable OpDesc::Attr *attr_; @@ -448,7 +465,12 @@ const std::vector &CompileTimeInferShapeContext::Outputs( DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { auto var = block_.FindVarRecursive(name); PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); - return framework::make_ddim(var->Shape()); + try { + return framework::make_ddim(var->Shape()); + } catch (...) { + VLOG(5) << "GetDim of variable " << name << " error"; + std::rethrow_exception(std::current_exception()); + } } void CompileTimeInferShapeContext::SetDim(const std::string &name, diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index e3e96441bbf51729f2ba69c9257e6961b1de0d5c..da032319afa775571d3942bf6ae415db7d233735 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -73,6 +73,10 @@ class OpDescBind { void Rename(const std::string &old_name, const std::string &new_name); + void RenameOutput(const std::string &old_name, const std::string &new_name); + + void RenameInput(const std::string &old_name, const std::string &new_name); + // Only be used in C++ const AttributeMap &GetAttrMap() const; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 3276f8af396fe58450a8dc6713fe61e49d5ca708..93467ab8ac796277b47a861a427de2837fb2d3d4 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext { void OperatorWithKernel::Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const { - if (VLOG_IS_ON(1)) { - auto inputs = this->InputVars(); - auto outputs = this->OutputVars(true); - std::ostringstream sout; - sout << "Run operator " << this->Type() << " From ["; - std::ostream_iterator out_it(sout, ","); - std::copy(inputs.begin(), inputs.end(), out_it); - sout << "] to ["; - std::copy(outputs.begin(), outputs.end(), out_it); - sout << "]"; - VLOG(1) << sout.str(); - } - RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 9428b8a07ea0af005f6e960ddaa02da624ad9d97..9ad6272c99dd6a85520ae44c1331ac232bc6a9a2 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -38,11 +38,12 @@ Scope& Scope::NewScope() const { Variable* Scope::Var(const std::string& name) { auto iter = vars_.find(name); if (iter != vars_.end()) { + VLOG(3) << "Get existing variable " << name; return iter->second; } Variable* v = new Variable(); vars_[name] = v; - VLOG(3) << "Create variable " << name << " on scope"; + VLOG(3) << "Create variable " << name; v->name_ = &(vars_.find(name)->first); return v; } diff --git a/paddle/framework/shape_inference.h b/paddle/framework/shape_inference.h index 7d36ead2ca85328c7843b3b5d423cf8e921d1c93..05dc47f06ac81f0acb6d0317cbecb3009c7dd7f0 100644 --- a/paddle/framework/shape_inference.h +++ b/paddle/framework/shape_inference.h @@ -53,6 +53,10 @@ class InferShapeContext { virtual bool IsRuntime() const = 0; + // Note: In while op, we need this to be public + void SetDims(const std::vector &names, + const std::vector &dims); + protected: virtual framework::DDim GetDim(const std::string &name) const = 0; virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0; @@ -60,9 +64,6 @@ class InferShapeContext { std::vector GetDims( const std::vector &names) const; - void SetDims(const std::vector &names, - const std::vector &dims); - std::vector GetVarTypes( const std::vector &names) const; diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 91d732641a4a5eed050841b59fd10da397eb732f..41ead3c5ecef248830cfb0f8be360f21dcd58e7b 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -73,7 +73,6 @@ if(MOBILE_INFERENCE) list(REMOVE_ITEM GSERVER_SOURCES dataproviders/DataProvider.cpp dataproviders/MultiDataProvider.cpp - dataproviders/ProtoDataProvider.cpp dataproviders/PyDataProvider2.cpp dataproviders/PyDataProvider.cpp) diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 8b7b2e9b65898950e036ebc023cd28990cef303f..f5a41b66bf09a4abc5ae7b64f227ca52461408f5 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) { } END_DEFINE_ACTIVATION(sequence_softmax) +/* + * @brief SoftSign Activation. + * \f[ + * f(z) = \frac{z}{1 + |z|} + * \f] + */ +BEGIN_DEFINE_ACTIVATION(softsign) +private: +MatrixPtr denominator_; + +Error __must_check forward(Argument& act) { + size_t height = act.value->getHeight(); + size_t width = act.value->getWidth(); + Matrix::resizeOrCreate( + denominator_, height, width, false, useGpu(act.deviceId)); + denominator_->assign(*act.value); + denominator_->abs2(); + denominator_->add(1.); + + act.value->dotDiv(*act.value, *denominator_); + return Error(); +} + +Error __must_check backward(Argument& act) { + denominator_->square2(); + denominator_->scalarDiv(*denominator_, 1.); + act.grad->dotMul(*act.grad, *denominator_); + return Error(); +} +END_DEFINE_ACTIVATION(softsign) + /** * @brief Relu Activation. * forward. y = max(0, z) diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/gserver/dataproviders/DataProvider.cpp index 0478256f9cd81f4a99eb0cbcbd1a5a21de5cf14b..106cf5b6228e636026ded558d0f591022f1ae586 100644 --- a/paddle/gserver/dataproviders/DataProvider.cpp +++ b/paddle/gserver/dataproviders/DataProvider.cpp @@ -16,8 +16,8 @@ limitations under the License. */ #include #include -#include "ProtoDataProvider.h" #include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" #include "paddle/utils/StringUtil.h" #include "paddle/utils/Util.h" @@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config, REGISTER_DATA_PROVIDER(simple, SimpleDataProvider); REGISTER_DATA_PROVIDER(dummy, DummyDataProvider); -REGISTER_DATA_PROVIDER(proto, ProtoDataProvider); -REGISTER_DATA_PROVIDER(proto_sequence, ProtoSequenceDataProvider); int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) { int64_t batchSize = doubleBuffer_ ? getNextBatchFromBuffer(size, batch) diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.cpp b/paddle/gserver/dataproviders/ProtoDataProvider.cpp deleted file mode 100644 index c6f5cab1915b7f41d505c37a7fef762a392bad7f..0000000000000000000000000000000000000000 --- a/paddle/gserver/dataproviders/ProtoDataProvider.cpp +++ /dev/null @@ -1,932 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ProtoDataProvider.h" -#include -#include -#include -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" - -#include "DataProviderGroup.h" -#include "paddle/utils/Logging.h" - -DEFINE_double(memory_threshold_on_load_data, - 1.0, - "stop loading data when memory is not sufficient"); - -namespace paddle { - -REGISTER_DATA_PROVIDER(proto_group, DataProviderGroup); -REGISTER_DATA_PROVIDER(proto_sequence_group, - DataProviderGroup); - -ProtoDataProvider::ProtoDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll) - : DataProvider(config, useGpu), sampleNums_(0), currentSequenceIndex_(0) { - if (loadDataAll) { - loadData(config_.files()); - } -} - -void ProtoDataProvider::loadData(const std::vector& fileList) { - for (auto& file : fileList) { - if (FLAGS_memory_threshold_on_load_data < 1.0) { - double memUsage = getMemoryUsage(); - if (memUsage > FLAGS_memory_threshold_on_load_data) { - LOG(INFO) << "memUsage is " << memUsage << ", > " - << FLAGS_memory_threshold_on_load_data - << " therefore SKIP ALL REMAINING file."; - break; - } - } - LOG(INFO) << "load data file " << file; - loadDataFile(file); - } - - if (sequenceStartPositions_.size() == sampleNums_) { - // This means that each sample is one sequence - shuffledSequenceIds_.swap(sequenceStartPositions_); - } else { - sequenceStartPositions_.push_back(sampleNums_); - shuffledSequenceIds_.reserve(sequenceStartPositions_.size() - 1); - for (size_t i = 0; i < sequenceStartPositions_.size() - 1; ++i) { - shuffledSequenceIds_.push_back(i); - } - } - - LOG(INFO) << "read done, num of instance=" << sampleNums_; - showDataStats(); -} - -void ProtoDataProvider::loadData(const std::string& fileName) { - std::vector fileList; - loadFileList(fileName, fileList); - loadData(fileList); -} - -void ProtoDataProvider::checkDataHeader(const DataHeader& header) { - if (header_.slot_defs_size()) { - // header_ is already set. Need to check consistency. - CHECK_EQ(header_.slot_defs_size(), header.slot_defs_size()) - << "Different header"; - for (int i = 0; i < header.slot_defs_size(); ++i) { - CHECK_EQ(header_.slot_defs(i).type(), header.slot_defs(i).type()); - CHECK_EQ(header_.slot_defs(i).dim(), header.slot_defs(i).dim()); - } - return; - } - - // header_ is not set before - CHECK(header.slot_defs_size()) << "Invalid header: no slot is defined"; - int i; - for (i = 0; i < header.slot_defs_size(); ++i) { - if (header.slot_defs(i).type() == SlotDef::INDEX || - header.slot_defs(i).type() == SlotDef::VAR_MDIM_INDEX) { - break; - } - constexpr int kBufLen = 100; - char buf[kBufLen]; - snprintf(buf, kBufLen, "slot%d_nnz", i); - nnzStats_.push_back(getStat(buf)); - } - numVecSlots_ = i; - - // Check that INDEX slots are after VECTOR slots - for (int i = numVecSlots_; i < header.slot_defs_size(); ++i) { - CHECK(header.slot_defs(i).type() == SlotDef::INDEX || - header.slot_defs(i).type() == SlotDef::VAR_MDIM_INDEX); - } - - slots_.clear(); - slots_.reserve(header.slot_defs_size()); - for (int i = 0; i < header.slot_defs_size(); ++i) { - slots_.emplace_back(); - slots_.back().type = header.slot_defs(i).type(); - slots_.back().dim = header.slot_defs(i).dim(); - if (SlotDef::VECTOR_SPARSE_NON_VALUE == header.slot_defs(i).type() || - SlotDef::VECTOR_SPARSE_VALUE == header.slot_defs(i).type()) { - slots_.back().indices.push_back(0); - } - } - - header_ = header; -} - -void ProtoDataProvider::checkSample(const DataSample& sample) { - CHECK_EQ(numVecSlots_, sample.vector_slots_size()); - CHECK(header_.slot_defs_size() == numVecSlots_ + sample.id_slots_size() || - header_.slot_defs_size() == numVecSlots_ + sample.var_id_slots_size()); - for (int i = 0; i < numVecSlots_; ++i) { - uint32_t dim = header_.slot_defs(i).dim(); - switch (header_.slot_defs(i).type()) { - case SlotDef::VECTOR_DENSE: { - CHECK_EQ(static_cast(dim), sample.vector_slots(i).values_size()); - CHECK_EQ(0, sample.vector_slots(i).ids_size()); - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - if (0 == sample.vector_slots(i).ids_size()) { - break; - } - CHECK_LT(0, sample.vector_slots(i).ids_size()); - CHECK_EQ(0, sample.vector_slots(i).values_size()); - auto maxId = *std::max_element(sample.vector_slots(i).ids().begin(), - sample.vector_slots(i).ids().end()); - CHECK_GT(dim, maxId); - break; - } - case SlotDef::VECTOR_SPARSE_VALUE: { - if (0 == sample.vector_slots(i).ids_size()) { - CHECK_EQ(0, sample.vector_slots(i).values_size()); - break; - } - CHECK_LT(0, sample.vector_slots(i).values_size()); - CHECK_GE(static_cast(dim), sample.vector_slots(i).values_size()); - CHECK_EQ(sample.vector_slots(i).values_size(), - sample.vector_slots(i).ids_size()); - auto maxId = *std::max_element(sample.vector_slots(i).ids().begin(), - sample.vector_slots(i).ids().end()); - CHECK_GT(dim, maxId); - break; - } - case SlotDef::VAR_MDIM_DENSE: { - if (static_cast(dim) != 0) { - CHECK_EQ(static_cast(dim), sample.vector_slots(i).values_size()); - if (sample.vector_slots(i).dims_size() != 0) { - int totalDim = sample.vector_slots(i).dims(0); - for (int j = 1; j < sample.vector_slots(i).dims_size(); ++j) { - totalDim *= sample.vector_slots(i).dims(j); - } - CHECK_EQ(static_cast(dim), totalDim); - } - } else { - CHECK_NE(sample.vector_slots(i).dims_size(), 0); - int totalDim = sample.vector_slots(i).dims(0); - for (int j = 1; j < sample.vector_slots(i).dims_size(); ++j) { - totalDim *= sample.vector_slots(i).dims(j); - } - CHECK_EQ(totalDim, sample.vector_slots(i).values_size()); - } - break; - } - case SlotDef::STRING: { - CHECK_EQ(static_cast(1), sample.vector_slots(i).strs_size()); - CHECK_EQ(0, sample.vector_slots(i).ids_size()); - CHECK_EQ(0, sample.vector_slots(i).values_size()); - break; - } - default: - LOG(FATAL) << "BUG: Should not reach here"; - } - } - for (int i = numVecSlots_; i < header_.slot_defs_size(); ++i) { - if (header_.slot_defs(i).type() != SlotDef::VAR_MDIM_INDEX) { - uint32_t id = sample.id_slots(i - numVecSlots_); - if (id == -1U) continue; - CHECK_LT(id, header_.slot_defs(i).dim()); - } else { - for (int j = 0; j < sample.var_id_slots(i - numVecSlots_).ids_size(); - ++j) { - uint32_t id = sample.var_id_slots(i - numVecSlots_).ids(j); - CHECK_LT(id, header_.slot_defs(i).dim()); - } - } - } -} - -void ProtoDataProvider::loadDataFile(const std::string& fileName) { - std::ifstream is(fileName); - CHECK(is) << "Fail to open " << fileName; - bool dataCompression = str::endsWith(fileName, ".gz"); - std::unique_ptr reader(new ProtoReader(&is, dataCompression)); - CHECK(reader) << "Fail to create proto data input stream"; - - DataHeader header; - CHECK(reader->read(&header)); - checkDataHeader(header); - - DataSample sample; - do { - if (!reader->read(&sample)) { - break; - } - checkSample(sample); - if (sample.is_beginning()) { - sequenceStartPositions_.push_back(sampleNums_); - } - fillSlots(sample); - ++sampleNums_; - } while (true); - - CHECK(is.eof()) << "Fail to read file"; - reader.reset(nullptr); - is.close(); -} - -// checkSample has done before, no check here -void ProtoDataProvider::fillSlots(const DataSample& sample) { - for (size_t i = 0; i < slots_.size(); ++i) { - auto& slot = slots_[i]; - int dim = slot.dim; - switch (slot.type) { - case SlotDef::VECTOR_DENSE: { - size_t oldSize = slot.denseData.size(); - slot.denseData.resize(oldSize + dim); - const float* values = sample.vector_slots(i).values().data(); -#ifdef PADDLE_TYPE_DOUBLE - std::copy(values, values + dim, slot.denseData.begin() + oldSize); -#else - memcpy(slot.denseData.data() + oldSize, values, sizeof(real) * dim); -#endif - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - int slotSize = sample.vector_slots(i).ids_size(); - int subSlotSize = 0; - int id = 0; // the slot id - // find whether this vector_slots has subseq. If not has subseq, - // subSlotSize = 0. - for (id = 0; id < sample.subseq_slots_size(); id++) { - if (sample.subseq_slots(id).slot_id() == i) { - subSlotSize = sample.subseq_slots(id).lens_size(); - break; - } - } - if (subSlotSize && slot.subIndices.size() == 0UL) { - // If has subSeq, the first element of subIndices = 0. - slot.subIndices.push_back(0); - } - if (slotSize == 0UL) { - // if has no id, new indices = old indices. - slot.indices.push_back(slot.indices.back()); - // if has subSeq, new subIndices = old subIndices. - if (slot.subIndices.size()) { - slot.subIndices.push_back(slot.subIndices.back()); - } - break; - } - slot.sparseNonValueData.resize(slot.indices.back() + slotSize); - const unsigned int* ids = sample.vector_slots(i).ids().data(); - memcpy(slot.sparseNonValueData.data() + slot.indices.back(), - ids, - sizeof(*ids) * slotSize); - slot.indices.push_back(slot.indices.back() + slotSize); - if (subSlotSize) { - for (int ii = 0; ii < subSlotSize; ++ii) { - slot.subIndices.push_back(slot.subIndices.back() + - sample.subseq_slots(id).lens(ii)); - } - } - break; - } - case SlotDef::VECTOR_SPARSE_VALUE: { - if (0 == sample.vector_slots(i).ids_size()) { - slot.indices.push_back(slot.indices.back()); - break; - } - int slotSize = sample.vector_slots(i).ids_size(); - slot.sparseFloatValueData.resize(slot.indices.back() + slotSize); - const unsigned int* ids = sample.vector_slots(i).ids().data(); - const float* values = sample.vector_slots(i).values().data(); - for (int ii = 0; ii < slotSize; ++ii) { - slot.sparseFloatValueData[slot.indices.back() + ii].col = ids[ii]; - slot.sparseFloatValueData[slot.indices.back() + ii].value = - values[ii]; - } - slot.indices.push_back(slot.indices.back() + slotSize); - break; - } - case SlotDef::INDEX: { - slot.indexData.push_back(sample.id_slots(i - numVecSlots_)); - break; - } - case SlotDef::VAR_MDIM_DENSE: { - size_t oldSize = slot.varDenseData.size(); - slot.varDenseData.resize(oldSize + 1); - size_t varDim = sample.vector_slots(i).values_size(); - slot.varDenseData[oldSize].data.resize(varDim); - const float* values = sample.vector_slots(i).values().data(); -#ifdef PADDLE_TYPE_DOUBLE - std::copy( - values, values + varDim, slot.varDenseData[oldSize].data.data()); -#else - memcpy(slot.varDenseData[oldSize].data.data(), - values, - sizeof(real) * varDim); -#endif - slot.varDenseData[oldSize].dims.resize( - sample.vector_slots(i).dims_size()); - memcpy(slot.varDenseData[oldSize].dims.data(), - sample.vector_slots(i).dims().data(), - sizeof(uint32_t) * sample.vector_slots(i).dims_size()); - break; - } - case SlotDef::VAR_MDIM_INDEX: { - size_t oldSize = slot.varIndices.size(); - slot.varIndices.resize(oldSize + 1); - size_t varDim = sample.var_id_slots(i - numVecSlots_).ids_size(); - slot.varIndices[oldSize].resize(varDim); - memcpy(slot.varIndices[oldSize].data(), - sample.var_id_slots(i - numVecSlots_).ids().data(), - sizeof(uint32_t) * varDim); - break; - } - case SlotDef::STRING: { - slot.strData.push_back(sample.vector_slots(i).strs(0)); - break; - } - } - } -} - -void ProtoDataProvider::showDataStats() { - std::ostringstream oss; - for (size_t i = 0; i < slots_.size(); ++i) { - auto& slot = slots_[i]; - if (slot.type == SlotDef::VECTOR_SPARSE_NON_VALUE) { - size_t nnz = slot.sparseNonValueData.size(); - oss << "slot" << i << ":avgNNZ=" << ((double)nnz / sampleNums_) << "; "; - } else if (slot.type == SlotDef::VECTOR_SPARSE_VALUE) { - size_t nnz = slot.sparseFloatValueData.size(); - oss << "slot" << i << ":avgNNZ=" << ((double)nnz / sampleNums_) << "; "; - } - } - LOG(INFO) << oss.str(); -} - -void ProtoDataProvider::reset() { - currentSequenceIndex_ = 0; - if (!skipShuffle_) { - shuffle(); - } - - DataProvider::reset(); -} - -void ProtoDataProvider::shuffle() { - std::shuffle(shuffledSequenceIds_.begin(), - shuffledSequenceIds_.end(), - ThreadLocalRandomEngine::get()); -} - -/* - Loop through sequences starting from currentSequenceIndex_ - for at most size samples. For each sequence ranging from [begin, end), - op(begin, end) will be called. - - return the number of sequences scanned -*/ -template -int64_t ProtoDataProvider::sequenceLoop(Op op, int64_t size) { - int64_t sz = 0; - size_t i; - size_t sequenceCount = shuffledSequenceIds_.size(); - if (usageRatio_ < 1.0f) { - sequenceCount = static_cast(sequenceCount * usageRatio_); - } - for (i = currentSequenceIndex_; i < sequenceCount; ++i) { - size_t id = shuffledSequenceIds_[i]; - int64_t begin = sequenceStartPositions_[id]; - int64_t end = sequenceStartPositions_[id + 1]; - int64_t len = end - begin; - if (sz + len > size && sz > 0) break; - sz += len; - op(begin, end); - } - return i - currentSequenceIndex_; -} - -/* - Loop through sequences starting from currentSequenceIndex_ - for at most size samples. For each sample of each sequence at position - pos, op(pos) will be called. - - return the number of sequences scanned -*/ -template -int64_t ProtoDataProvider::sampleLoop(Op op, int64_t size) { - if (iidData()) { - size = std::min(sampleNums_ - currentSequenceIndex_, size); - for (int64_t i = currentSequenceIndex_; i < currentSequenceIndex_ + size; - ++i) { - size_t pos = shuffledSequenceIds_[i]; - op(pos); - } - return size; - } else { - auto f = [op](int64_t begin, int64_t end) { - for (int64_t pos = begin; pos < end; ++pos) { - op(pos); - } - }; - return sequenceLoop(f, size); - } -} - -/* - Loop through sub-sequences starting from currentSequenceIndex_ - for at most size samples. For each sample of each sub-sequence at position - pos, op(pos) will be called. - - return the number of sub-sequences scanned -*/ -template -int64_t ProtoDataProvider::subSampleLoop(Op op, int64_t size, int slot) { - CHECK(iidData()) << "subSampleLoop only accepts iid data"; - size = std::min(sampleNums_ - currentSequenceIndex_, size); - int subSize = 0; - for (int64_t i = currentSequenceIndex_; i < currentSequenceIndex_ + size; - ++i) { - size_t pos = shuffledSequenceIds_[i]; - int64_t* indexs = slots_[slot].indices.data(); - int64_t* subIndexs = slots_[slot].subIndices.data(); - int64_t subSeqStart = 0; - int64_t subSeqEnd = 0; - for (int j = 0; j < (int)slots_[slot].subIndices.size(); j++) { - if (subIndexs[j] == indexs[pos]) { - subSeqStart = j; - if (subIndexs[pos] == subIndexs[pos + 1]) { - subSeqEnd = j + 1; - break; - } - } else if (subIndexs[j] == indexs[pos + 1]) { - subSeqEnd = j; - break; - } - } - for (int j = subSeqStart; j < subSeqEnd; j++) { - op(j); - } - subSize += subSeqEnd - subSeqStart; - } - return subSize; -} - -int64_t ProtoDataProvider::getNextBatchInternal(int64_t size, - DataBatch* batch) { - int64_t numSequences = 0; // actual number of sequences in the batch - - // the number of sequences scanned, including those skipped because too long - int64_t numScannedSeqs = 0; - std::lock_guard guard(lock_); - if (iidData()) { - size = std::min(getSize() - currentSequenceIndex_, size); - numScannedSeqs = numSequences = size; - } else { - int64_t sz = 0; - auto op = [&sz, &numSequences](int64_t begin, int64_t end) { - ++numSequences; - sz += end - begin; - }; - numScannedSeqs = sequenceLoop(op, size); - VLOG_IF(1, numScannedSeqs > numSequences) - << numScannedSeqs - numSequences - << " sequences are skipped because longer than " << size; - size = sz; - } - if (size <= 0) return 0; - - DataBatch& cpuBatch = *cpuBatch_; - std::vector& cpuArguments = cpuBatch.getStreams(); - cpuBatch.setSize(size); - cpuArguments.resize(header_.slot_defs_size()); - - if (!iidData()) { - ICpuGpuVector::resizeOrCreate(cpuArguments[0].sequenceStartPositions, - numSequences + 1, - /* useGpu= */ false); - int* buf = cpuArguments[0].sequenceStartPositions->getMutableData(false); - int pos = 0; - int i = 0; - auto op = [buf, &pos, &i](int64_t begin, int64_t end) { - buf[i] = pos; - pos += end - begin; - ++i; - }; - sequenceLoop(op, size); - buf[i] = size; - for (size_t slot = 1; slot < cpuArguments.size(); ++slot) { - cpuArguments[slot].sequenceStartPositions = - cpuArguments[0].sequenceStartPositions; - } - } - - for (int slot = 0; slot < header_.slot_defs_size(); ++slot) { - size_t dim = header_.slot_defs(slot).dim(); - SlotDef::SlotType slotType = header_.slot_defs(slot).type(); - - std::vector dataPos; - dataPos.reserve(size); - auto op = [this, &dataPos](int64_t pos) { dataPos.push_back(pos); }; - sampleLoop(op, size); - - switch (slotType) { - case SlotDef::VECTOR_DENSE: { - Matrix::resizeOrCreate(cpuArguments[slot].value, - size, - dim, - false, // trans = false - false); // useGpu = false - real* buf = cpuArguments[slot].value->getData(); - for (int i = 0; i < size; ++i) { - memcpy(buf + i * dim, - slots_[slot].denseData.data() + dataPos[i] * dim, - sizeof(real) * dim); - } - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - if (!(cpuArguments[slot].value)) { - cpuArguments[slot].value = - Matrix::createSparseMatrix(size, - dim, - size /*DEFAULT_AVG_WIDTH = 1*/, - NO_VALUE, - SPARSE_CSR, - false, - useGpu_); - } - auto mat = cpuArguments[slot].value; - mat->resize(size, dim); - if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - dataPos.data(), - slots_[slot].indices.data(), - slots_[slot].sparseNonValueData.data(), - HPPL_STREAM_1); - } else if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - dataPos.data(), - slots_[slot].indices.data(), - slots_[slot].sparseNonValueData.data()); - } else { - LOG(FATAL) << "Not Supported"; - } - size_t numElements = 0; - for (auto pos : dataPos) { - numElements += - slots_[slot].indices[pos + 1] - slots_[slot].indices[pos]; - } - nnzStats_[slot]->addSample(numElements); - - break; - } - case SlotDef::VECTOR_SPARSE_VALUE: { - if (!(cpuArguments[slot].value)) { - cpuArguments[slot].value = - Matrix::createSparseMatrix(size, - dim, - size /*DEFAULT_AVG_WIDTH = 1*/, - FLOAT_VALUE, - SPARSE_CSR, - false, - useGpu_); - } - auto mat = cpuArguments[slot].value; - mat->resize(size, dim); - if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - dataPos.data(), - slots_[slot].indices.data(), - slots_[slot].sparseFloatValueData.data(), - HPPL_STREAM_1); - } else if (std::dynamic_pointer_cast(mat)) { - std::dynamic_pointer_cast(mat)->copyFrom( - dataPos.data(), - slots_[slot].indices.data(), - slots_[slot].sparseFloatValueData.data()); - } else { - LOG(FATAL) << "Not Supported"; - } - break; - } - case SlotDef::INDEX: { - IVector::resizeOrCreate(cpuArguments[slot].ids, - size, - /* useGpu= */ false); - int* buf = cpuArguments[slot].ids->getData(); - for (int i = 0; i < size; ++i) { - buf[i] = slots_[slot].indexData[dataPos[i]]; - } - break; - } - case SlotDef::VAR_MDIM_DENSE: { - CHECK_EQ(size, 1); - auto mat = cpuArguments[slot].value; - size_t totalDim = slots_[slot].varDenseData[dataPos[0]].data.size(); - - CHECK_EQ(slots_[slot].varDenseData[dataPos[0]].dims.size(), size_t(3)); - size_t height, width, depth, oldWidth; - /* dims[2] is depth, will be changed to dims[0] in future */ - depth = slots_[slot].varDenseData[dataPos[0]].dims[2]; - height = slots_[slot].varDenseData[dataPos[0]].dims[1]; - width = slots_[slot].varDenseData[dataPos[0]].dims[0]; - oldWidth = width; - /* process the undesirable sample */ - if (oldWidth < height) { - width = height; - } - cpuArguments[slot].setFrameHeight(height); - cpuArguments[slot].setFrameWidth(width); - - if (oldWidth < height) { - totalDim = width * height * depth; - } - Matrix::resizeOrCreate(cpuArguments[slot].value, - size, - totalDim, - false, // trans = false - false); // useGpu = false - real* buf = cpuArguments[slot].value->getData(); - cpuArguments[slot].value->zeroMem(); - if (oldWidth < height) { - real* srcBuf = slots_[slot].varDenseData[dataPos[0]].data.data(); - for (size_t i = 0; i < depth; i++) { - for (size_t j = 0; j < height; j++) { - for (size_t k = 0; k < oldWidth; k++) { - buf[i * height * width + j * width + k] = - srcBuf[i * height * oldWidth + j * oldWidth + k]; - } - } - } - } else { - memcpy(buf, - slots_[slot].varDenseData[dataPos[0]].data.data(), - sizeof(real) * totalDim); - } - ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions, - size + 1, /* size == 1 currently */ - /* useGpu= */ false); - int* bufStarts = - cpuArguments[slot].sequenceStartPositions->getMutableData(false); - bufStarts[0] = 0; - bufStarts[1] = 1; - break; - } - case SlotDef::VAR_MDIM_INDEX: { - CHECK_EQ(size, 1); - size_t totalDim = slots_[slot].varIndices[dataPos[0]].size(); - IVector::resizeOrCreate(cpuArguments[slot].ids, - totalDim, - /* useGpu= */ false); - int* buf = cpuArguments[slot].ids->getData(); - memcpy(buf, - slots_[slot].varIndices[dataPos[0]].data(), - sizeof(int) * totalDim); - - ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions, - size + 1, /* size == 1 currently */ - /* useGpu= */ false); - int* bufStarts = - cpuArguments[slot].sequenceStartPositions->getMutableData(false); - bufStarts[0] = 0; - /* we expand the convolutinal feature map to a sequence data, - * so there should be a corresponding sequence labels */ - bufStarts[1] = totalDim; - break; - } - case SlotDef::STRING: { - if (cpuArguments[slot].strs) { - cpuArguments[slot].strs->resize(size); - } else { - cpuArguments[slot].strs = - std::make_shared>(size); - } - for (int i = 0; i < size; ++i) { - (*cpuArguments[slot].strs)[i] = slots_[slot].strData[dataPos[i]]; - } - break; - } - } - } - - if (useGpu_) { - std::vector& cpuArguments = cpuBatch.getStreams(); - DataBatch& gpuBatch = *gpuBatch_; - std::vector& gpuArguments = gpuBatch.getStreams(); - gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(size); - for (int i = 0; i < header_.slot_defs_size(); ++i) { - SlotDef::SlotType slotType = header_.slot_defs(i).type(); - if (SlotDef::VECTOR_SPARSE_VALUE == slotType || - SlotDef::VECTOR_SPARSE_NON_VALUE == slotType) { - gpuArguments[i] = cpuArguments[i]; - gpuArguments[i].sequenceStartPositions = - cpuArguments[i].sequenceStartPositions; - } else { - gpuArguments[i].resizeAndCopyFrom( - cpuArguments[i], useGpu_, HPPL_STREAM_1); - } - } - hl_stream_synchronize(HPPL_STREAM_1); - *batch = gpuBatch; - } else { - *batch = cpuBatch; - } - - currentSequenceIndex_ += numScannedSeqs; - - return batch->getSize(); -} - -ProtoSequenceDataProvider::ProtoSequenceDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll) - : ProtoDataProvider(config, useGpu, loadDataAll) {} - -int64_t ProtoSequenceDataProvider::getNextBatchInternal(int64_t size, - DataBatch* batch) { - CHECK(iidData()) << "ProtoSequenceDataProvider only accepts iid data"; - int64_t numSequences = 0; // actual number of sequences in the batch - - // the number of sequences scanned, including those skipped because too long - int64_t numScannedSeqs = 0; - std::lock_guard guard(lock_); - size = std::min(getSize() - currentSequenceIndex_, size); - numScannedSeqs = numSequences = size; - if (size <= 0) return 0; - - DataBatch& cpuBatch = *cpuBatch_; - std::vector& cpuArguments = cpuBatch.getStreams(); - cpuBatch.setSize(size); - cpuArguments.resize(header_.slot_defs_size()); - - for (int slot = 0; slot < header_.slot_defs_size(); ++slot) { - SlotDef::SlotType slotType = header_.slot_defs(slot).type(); - - std::vector dataPos; - dataPos.reserve(size); - auto op = [this, &dataPos](int64_t pos) { dataPos.push_back(pos); }; - sampleLoop(op, size); - - // current slot: sequenceStartPositions - ICpuGpuVector::resizeOrCreate(cpuArguments[slot].sequenceStartPositions, - size + 1, - /* useGpu= */ false); - - switch (slotType) { - case SlotDef::VECTOR_SPARSE_VALUE: - case SlotDef::VAR_MDIM_DENSE: - case SlotDef::VAR_MDIM_INDEX: { - LOG(FATAL) << "ProtoSequenceDataProvider only support" - << " VECTOR_DENSE, VECTOR_SPARSE_NON_VALUE and INDEX slots"; - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - // copy to IDS, not value - // pointers used in current slot - sparse_non_value_t* data = slots_[slot].sparseNonValueData.data(); - int64_t* indexs = slots_[slot].indices.data(); - int64_t* seqs = dataPos.data(); - - // current slot: i need size instances. what is the total length? - int totalFeatureInCurrentSlot = 0; - for (int ins = 0; ins < size; ins++) { - int64_t currInsId = seqs[ins]; - totalFeatureInCurrentSlot += - indexs[currInsId + 1] - indexs[currInsId]; - // special: if current instance has NO feature in current slot - if (indexs[currInsId + 1] == indexs[currInsId]) { - totalFeatureInCurrentSlot++; - } - } - // done - - // current slot: ids - IVector::resizeOrCreate(cpuArguments[slot].ids, - totalFeatureInCurrentSlot, - /* useGpu= */ false); - - // where to write - int* currPosOfArgumentId = cpuArguments[slot].ids->getData(); - int* currPosOfArgumentSeqStart = - cpuArguments[slot].sequenceStartPositions->getMutableData(false); - int allSequenceLength = 0; - currPosOfArgumentSeqStart[0] = 0; - // for each instance, copy data and fill sequence positions - for (int instance = 0; instance < size; instance++) { - int64_t currInstanceId = seqs[instance]; - int64_t currInstanceLength = - indexs[currInstanceId + 1] - indexs[currInstanceId]; - sparse_non_value_t* currInstanceData = data + indexs[currInstanceId]; - // write sequenceStartPositions - allSequenceLength += currInstanceLength; - currPosOfArgumentSeqStart[instance + 1] = allSequenceLength; - // copy features - for (int featCopier = 0; featCopier < currInstanceLength; - featCopier++) { - currPosOfArgumentId[featCopier] = currInstanceData[featCopier].col; - } - currPosOfArgumentId += currInstanceLength; - // special: if current instance has NO feature in current slot - if (currInstanceLength == 0) { - allSequenceLength++; - currPosOfArgumentSeqStart[instance + 1] = allSequenceLength; - currPosOfArgumentId[0] = -1; - currPosOfArgumentId++; - } - // done - } - if (slots_[slot].subIndices.size()) { - std::vector dataSubPos; - auto op = [this, &dataSubPos](int64_t pos) { - dataSubPos.push_back(pos); - }; - int subSize = subSampleLoop(op, size, slot); - ICpuGpuVector::resizeOrCreate( - cpuArguments[slot].subSequenceStartPositions, subSize + 1, false); - int* currPosOfArgumentSubSeqStart = - cpuArguments[slot].subSequenceStartPositions->getMutableData( - false); - int64_t* subSeqs = dataSubPos.data(); - int64_t* subIndexs = slots_[slot].subIndices.data(); - int allSubSequenceLength = 0; - currPosOfArgumentSubSeqStart[0] = 0; - // for each instance, compute sub-sequence number - for (int instance = 0; instance < subSize; instance++) { - int64_t currSubInstanceId = subSeqs[instance]; - int64_t currSubInstanceLength = - subIndexs[currSubInstanceId + 1] - subIndexs[currSubInstanceId]; - // write subSequenceStartPositions - allSubSequenceLength += currSubInstanceLength; - currPosOfArgumentSubSeqStart[instance + 1] = allSubSequenceLength; - // special: if current instance has NO feature in current slot - if (currSubInstanceLength == 0) { - allSubSequenceLength++; - currPosOfArgumentSubSeqStart[instance + 1] = allSubSequenceLength; - } - } - cpuArguments[slot].checkSubset(); - } - break; - } - case SlotDef::INDEX: { - // label slot - IVector::resizeOrCreate(cpuArguments[slot].ids, - size, - /* useGpu= */ false); - // fill labels - int* buf = cpuArguments[slot].ids->getData(); - for (int i = 0; i < size; ++i) { - buf[i] = slots_[slot].indexData[dataPos[i]]; - } - // label HAS sequence structure - cpuArguments[slot].sequenceStartPositions->fillSequence(false); - break; - } - case SlotDef::VECTOR_DENSE: { - // copy values - size_t dim = header_.slot_defs(slot).dim(); - Matrix::resizeOrCreate(cpuArguments[slot].value, - size, - dim, - false, // trans = false - false); // useGpu = false - real* buf = cpuArguments[slot].value->getData(); - for (int i = 0; i < size; ++i) { - memcpy(buf + i * dim, - slots_[slot].denseData.data() + dataPos[i] * dim, - sizeof(real) * dim); - } - // sequence structure - cpuArguments[slot].sequenceStartPositions->fillSequence(false); - break; - } - default: { LOG(FATAL) << "should not reach here"; } - } - } - - if (useGpu_) { - std::vector& cpuArguments = cpuBatch.getStreams(); - DataBatch& gpuBatch = *gpuBatch_; - std::vector& gpuArguments = gpuBatch.getStreams(); - gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(size); - for (size_t i = 0; i < cpuArguments.size(); ++i) { - gpuArguments[i].resizeAndCopyFrom( - cpuArguments[i], useGpu_, HPPL_STREAM_1); - } - hl_stream_synchronize(HPPL_STREAM_1); - *batch = gpuBatch; - } else { - *batch = cpuBatch; - } - - currentSequenceIndex_ += numScannedSeqs; - return batch->getSize(); -} - -} // namespace paddle diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.h b/paddle/gserver/dataproviders/ProtoDataProvider.h deleted file mode 100644 index 7dd45e062248f20d24c633dd4e1c8b7eebcbfa1b..0000000000000000000000000000000000000000 --- a/paddle/gserver/dataproviders/ProtoDataProvider.h +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "DataFormat.pb.h" -#include "paddle/utils/Stat.h" - -#include "DataProvider.h" -#include "ProtoReader.h" - -namespace paddle { - -/** - * @brief Provider data from protobuf data file with each sample - * specified by proto message - * - * DataSample defined in DataFormat.proto. - * - * The file format is - * - * header - * - * sample1 - * - * sample2 - * - * ... - * - * sampleN - * - * @note: In the data file, each message is prefixed with its length. - * The read/write of the protbuf are implemented in ProtoReader.h - */ -class ProtoDataProvider : public DataProvider { -public: - ProtoDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll = true); - virtual void reset(); - - /** - * @note this size includes the sequences which are skipped because they - * are longer than the batch size. - */ - virtual int64_t getSize() { - int64_t size = sampleNums_; - if (usageRatio_ < 1.0f) { - size = static_cast(size * usageRatio_); - } - return size; - } - virtual void shuffle(); - - void loadData(const std::vector& fileList); - - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); - -protected: - /** - * @brief load protobuf data from a list of file - * @param[in] fileName file name of a file which contains - * a list of file names - */ - void loadData(const std::string& fileName); - - /** - * @brief load protobuf data from file - * @param[in] fileName data file name - */ - void loadDataFile(const std::string& fileName); - /** @brief check data header of each data sample - * @param[in] header data header read from protobuf data - */ - void checkDataHeader(const DataHeader& header); - /** - * @brief fill protobuf data into slot_, - * slot_ is a vector of ProtoSlot in memory. - * @param[in] sample data sample read from protobuf data - */ - void fillSlots(const DataSample& sample); - - /** - * @brief return true if each sample is one sequence, i.e., independent - * of other samples. - */ - inline bool iidData() const { return sequenceStartPositions_.empty(); } - - /** - * @brief check that sample is consistent with header_ - */ - void checkSample(const DataSample& sample); - - template - int64_t sequenceLoop(Op op, int64_t size); - - template - int64_t sampleLoop(Op op, int64_t size); - - template - int64_t subSampleLoop(Op op, int64_t size, int slot); - - void showDataStats(); - -protected: - struct ProtoVarSlot { - std::vector data; - std::vector dims; - }; - - struct ProtoSlot { - SlotDef::SlotType type; - int dim; - std::vector indexData; - std::vector denseData; - std::vector sparseNonValueData; - std::vector sparseFloatValueData; - std::vector indices; - std::vector subIndices; - - std::vector varDenseData; - std::vector> varIndices; - std::vector strData; - }; - DataHeader header_; - int numVecSlots_; - - std::vector slots_; - size_t sampleNums_; - - /** - * The starting position of each sequence in samples. - * The last element should be num of samples. - * If empty, each sample is one sequence. - */ - std::vector sequenceStartPositions_; - - int64_t currentSequenceIndex_; - - // The size should be the number of sequences. - std::vector shuffledSequenceIds_; - - ThreadLocalD cpuBatch_; - ThreadLocalD gpuBatch_; - - RWLock lock_; - std::vector nnzStats_; // stats for number of none-zeros entries -}; - -/** - * @brief Special use for Proto data: instances should contain sparse-non-value - * slots - * and label. - * - * @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE - */ -class ProtoSequenceDataProvider : public ProtoDataProvider { -public: - ProtoSequenceDataProvider(const DataConfig& config, - bool useGpu, - bool loadDataAll = true); - ~ProtoSequenceDataProvider() {} - virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch); -}; - -} // namespace paddle diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp index bc7d1c83a48aefeb4bc6d3baa32b78aba712e58d..925af31289d0c8ca534a30a16b14bfd2df90b013 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.cpp +++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp @@ -41,6 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); + epsilon_ = config_.epsilon(); weight_.reset(new Weight(1, channels_, parameters_[0])); movingMean_.reset(new Weight(1, channels_, parameters_[1])); diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index e721d2d267a31cae46407673b8b1281e87055608..2ac3cd9d670d0fcf9c40ad2f117d5a72479663a3 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -94,6 +94,8 @@ protected: bool useGlobalStats_; // use to compute moving mean and variance. real movingAvgFraction_; + // Epsilon is a small random noise used in batch normalization for stability. + real epsilon_; }; } // namespace paddle diff --git a/paddle/gserver/layers/BatchNormalizationLayer.cpp b/paddle/gserver/layers/BatchNormalizationLayer.cpp index dacff25e5927daf9c991577a71be86b160228317..25ab5cd927792d18f78bc1fa33eee4029b427cc7 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.cpp +++ b/paddle/gserver/layers/BatchNormalizationLayer.cpp @@ -22,8 +22,6 @@ namespace paddle { REGISTER_LAYER(batch_norm, BatchNormalizationLayer); -const real BatchNormalizationLayer::EPS = 1E-5; - bool BatchNormalizationLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ @@ -53,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) { calMovingMeanAndVar(); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } @@ -74,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() { savedInvVar_->copyFrom(*(movingVar_->getW())); savedInvVar_->downClip(real(0.0)); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h index f6115801fc6b341c0718f8851617de43bdeeec09..1fdb5e2070259a14ab6f70957c9cf03f0699f734 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.h +++ b/paddle/gserver/layers/BatchNormalizationLayer.h @@ -39,9 +39,6 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /// Epsilon value used in the batch normalization formula. - static const real EPS; - /// Load pre-calculated mean and std. void setMeanAndStd(); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index 49a9540c0b6e36b59ed786287ff5c4569b69a6a5..8390b55026c895b661cb514714ba92c05a7bf02e 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -21,8 +21,6 @@ namespace paddle { REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer); -const double CudnnBatchNormLayer::EPS = 1E-5; - bool CudnnBatchNormLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ @@ -61,6 +59,9 @@ void CudnnBatchNormLayer::forward(PassType passType) { real* movingMean = movingMean_->getW()->getData(); real* movingVar = movingVar_->getW()->getData(); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); + if (!useGlobalStats_) { REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); real* savedMean = savedMean_->getData(); @@ -75,7 +76,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { 1.0 - movingAvgFraction_, movingMean, movingVar, - EPS, + eps_, savedMean, savedInvVar); } else { @@ -90,7 +91,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS); + eps_); } else { // There is a limitation in cudnn library. // When the batch size is larger than 1024 in cuDNN v5.1, @@ -101,7 +102,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS, + eps_, batchSize, channels_, imageH_ * imageD_, @@ -128,6 +129,9 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { real* savedMean = savedMean_->getData(); real* savedInvVar = savedInvVar_->getData(); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); + auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { Matrix::resizeOrCreate(m, h, w, false, true); m->zeroMem(); @@ -157,7 +161,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { gamma, gammaGrad, betaGrad, - EPS, + eps_, savedMean, savedInvVar); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index 413efd4d3ecd734b343efbcf8328ac0592daddda..1a3f0c0cbf8a1540e77cef70c753c91298728484 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "BatchNormBaseLayer.h" #include "Layer.h" #include "paddle/utils/Stat.h" @@ -46,12 +47,9 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /** - * Epsilon value used in the batch normalization formula. - * Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h. - * Same epsilon value should be used in forward and backward functions. - */ - static const double EPS; + /// Epsilon value used in the batch normalization formula. + /// Same epsilon value should be used in forward and backward functions. + double eps_; /// Input/output tensor descriptor desc hl_tensor_descriptor ioDesc_; diff --git a/paddle/gserver/layers/DotProdLayer.cpp b/paddle/gserver/layers/DotProdLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e2dbe3c3c416f606d2938701f26288642b55267 --- /dev/null +++ b/paddle/gserver/layers/DotProdLayer.cpp @@ -0,0 +1,97 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +/** + * @brief A layer for computing the dot product of two vectors. + * Input1: vector (batchSize * dim) + * Input2: vector (batchSize * dim) + * Output: a matrix: (batchSize * 1) + */ + +class DotProdLayer : public Layer { +public: + explicit DotProdLayer(const LayerConfig& config) : Layer(config) {} + + ~DotProdLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(dot_prod, DotProdLayer); + +bool DotProdLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 2U); + CHECK_EQ(1UL, getSize()) + << "The output dimensionality of this layer should be fixed to 1."; + + return true; +} + +void DotProdLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + + size_t batchSize = inV0->getHeight(); + CHECK_EQ(inV1->getHeight(), batchSize); + CHECK_EQ(inV0->getWidth(), inV1->getWidth()); + + { + REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); + reserveOutput(batchSize, 1); + } + + MatrixPtr outV = getOutputValue(); + { + REGISTER_TIMER_INFO("FwDotProdTimer", getName().c_str()); + outV->sumOfProducts(*inV0, *inV1, 1, 0); + } +} + +void DotProdLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + MatrixPtr outG = getOutputGrad(); + MatrixPtr inG0 = getInputGrad(0); + MatrixPtr inG1 = getInputGrad(1); + + { + REGISTER_TIMER_INFO("BwDotProdTimer", getName().c_str()); + + if (inG0) { + inG0->addRowScale(0, *inV1, *outG); + } + + if (inG1) { + inG1->addRowScale(0, *inV0, *outG); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/L2DistanceLayer.cpp b/paddle/gserver/layers/L2DistanceLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c71df1b92cef9b19001a0984953a260fbdd1d762 --- /dev/null +++ b/paddle/gserver/layers/L2DistanceLayer.cpp @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "L2DistanceLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(l2_distance, L2DistanceLayer); + +bool L2DistanceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 2UL) << "The L2DistanceLayer accepts two and " + << "only two inputs."; + CHECK_EQ(getSize(), 1UL) << "The output dimensionality of L2DistanceLayer " + << "is fixed to be 1."; + + return true; +} + +void L2DistanceLayer::forward(PassType passType) { + Layer::forward(passType); + + const auto inV1 = getInputValue(0); + const auto inV2 = getInputValue(1); + + CHECK(inV1 && inV2); + CHECK_EQ(inV1->getHeight(), inV2->getHeight()) + << "The height of two inputs of this layer must be the same."; + CHECK_EQ(inV1->getWidth(), inV2->getWidth()) + << "The width of two inputs of this layer must be the same."; + + int batchSize = inV1->getHeight(); + int output_dim = getSize(); + { + REGISTER_TIMER_INFO("L2DistanceBpAtvTimer", getName().c_str()); + reserveOutput(batchSize, output_dim); + auto outV = getOutputValue(); + CHECK(outV) << "The output matrix should not be null."; + + Matrix::resizeOrCreate( + inputSub_, inV1->getHeight(), inV1->getWidth(), false, useGpu_); + + inputSub_->assign(*inV1); + inputSub_->sub(*inV2); + outV->sumOfProducts(*inputSub_, *inputSub_, 1, 0); + outV->sqrt2(*outV); + } +} + +void L2DistanceLayer::backward(const UpdateCallback& callback) { + const auto outG = getOutputGrad(); + const auto outV = getOutputValue(); + CHECK(outG && outV); + + auto inGrad1 = getInputGrad(0); + auto inGrad2 = getInputGrad(1); + + { + REGISTER_TIMER_INFO("L2DistanceBpAtvTimer", getName().c_str()); + + if (inGrad1 || inGrad2) { + outV->scalarDiv(*outV, 1.); + outV->dotMul(*outG, *outV); + } + + if (inGrad1) inGrad1->addRowScale(0, *inputSub_, *outV); + + if (inGrad2) { + inputSub_->mulScalar(-1.); + inGrad2->addRowScale(0, *inputSub_, *outV); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/L2DistanceLayer.h b/paddle/gserver/layers/L2DistanceLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..9b12847a10e64a713635c0df079507b23a73c257 --- /dev/null +++ b/paddle/gserver/layers/L2DistanceLayer.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief The layer calculates the l2 distance between two input vectors. + * \f[ + * f(\bf{x}, \bf{y}) = \sqrt{\sum_{i=1}^D(x_i - y_i)} + * \f] + * + * - Input1: A vector (batchSize * dataDim) + * - Input2: A vector (batchSize * dataDim) + * - Output: A vector (batchSize * 1) + * + * The configuration api is: l2_distance_layer. + */ + +class L2DistanceLayer : public Layer { +public: + explicit L2DistanceLayer(const LayerConfig& config) : Layer(config) {} + ~L2DistanceLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + // Store the result of subtracting Input2 from Input1 in forward computation, + // which will be reused in backward computation. + MatrixPtr inputSub_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp index 0f2b67fd758ec1513f42c4cb1a36f2f3915f4740..39bffc26f7ddcd159130c492115b41080e32ce7f 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp @@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap, } void MKLDNNAddtoLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed"; reshapeInput(bs, ih, iw); ic = inputLayers_[0]->getSize() / ih / iw; CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); for (size_t i = 0; i < inputLayers_.size(); i++) { CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize()); CHECK_EQ(layerSize_, inputLayers_[i]->getSize()); @@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape( } void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, bias, out); - in = inVals_[0]; + resetFwdBuffers(inputs, biasVal_, out); std::shared_ptr fwdPD; std::shared_ptr biasPD; - resetFwdPD(fwdPD, biasPD, inVals_, bias, out); + resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, bias, out); + resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out); } void MKLDNNAddtoLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetBwdBuffers(inGrads_, bias, out); - in = inGrads_[0]; + resetBwdBuffers(inputs, biasGrad_, out); // backward only need share output grad to input grad - for (size_t i = 0; i < inGrads_.size(); i++) { - if (inGrads_[i] != nullptr) { - inGrads_[i] = out; - inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData()); + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i] != nullptr) { + inputs[i] = out; + inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData()); } } // backward bias bwdBias_ = nullptr; - if (bias) { + if (biasGrad_) { std::vector scales(bs_, 1.0); - std::vector srcPDs(bs_, bias->getPrimitiveDesc()); - auto biasPD = sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs); + std::vector srcPDs(bs_, + biasGrad_->getPrimitiveDesc()); + auto biasPD = + sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs); std::vector srcs; for (size_t i = 0; i < grads_.size(); ++i) { srcs.push_back(*(grads_[i])); } - bwdBias_.reset(new sum(biasPD, srcs, *bias)); + bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_)); pipeline.push_back(*bwdBias_); } } @@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { - resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i); + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); } diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/gserver/layers/MKLDNNAddtoLayer.h index 24504b7b4f50726e2b2757ca3029461cdc27b411..0ea3e208e5fab8cbed8b53390a9381e6f2bb5733 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.h +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.h @@ -26,9 +26,6 @@ namespace paddle { */ class MKLDNNAddtoLayer : public MKLDNNLayer { protected: - std::vector inVals_; - std::vector inGrads_; - // layer size == ic * ih * iw == oc * oh *ow, and can not be changed size_t layerSize_; @@ -50,52 +47,19 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - - void printGradFormat() override { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<"; - } - } - protected: - /** - * Forward functions: reset buffers(inputs, output, bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); @@ -110,17 +74,10 @@ protected: std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(inputs, output, bias) - */ void resetBwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * prepare for bias - */ void prepareBias(MKLDNNMatrixPtr& bias, const MatrixPtr& biasMat, const MKLDNNMatrixPtr& out, diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index 071bdf54d5dc9538d5ced580a73b9c0fbcea41fb..7faca0f8b7f54fa0a09e8fdab11064c8c26df375 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -21,8 +21,6 @@ namespace paddle { REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer); -const real MKLDNNBatchNormLayer::EPS = 1E-5; - bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { if (!MKLDNNLayer::init(layerMap, parameterMap)) { @@ -50,6 +48,8 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); + epsilon_ = config_.epsilon(); + VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") << " --- global stats"; VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_; @@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() { } void MKLDNNBatchNormLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); oh = ih; ow = iw; // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); } void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { // In training phase, it will always calculate mean and var, // so useGlobalStats must be false. @@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, useGlobalStats_ = false; } - resetFwdBuffers(in, wgt, out); + resetFwdBuffers(inputs[0], wgtVal_, out); - resetFwdPD(fwdPD_, in, wgt, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out); } void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, wgt, out); + resetBwdBuffers(inputs[0], wgtGrad_, out); - resetBwdPD(pd, in, wgt, out); + resetBwdPD(pd, inputs[0], wgtGrad_, out); - resetBwdPipeline(pipeline, pd, in, wgt, out); + resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out); } void MKLDNNBatchNormLayer::forward(PassType passType) { @@ -213,7 +210,7 @@ void MKLDNNBatchNormLayer::resetFwdPD( if (wgt) { flags_ = (flags_ | batch_normalization_flag::use_scale_shift); } - auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_); + auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_); pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); if (wgt) { @@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline( void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); if (gradScaleShift_) { CHECK(wgtVal_); resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); @@ -280,7 +277,7 @@ void MKLDNNBatchNormLayer::resetBwdPD( } CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc()); auto md = in->getMemoryDesc(); - auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_); + auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_); pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc()); @@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline( if (pd == nullptr) { return; } - CHECK(inVal_); + CHECK(inVals_[0]); bwdData_.reset( wgt && wgtVal_ - ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) - : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); + ? new bn_bwd( + *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt) + : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in)); pipeline.push_back(*bwdData_); } diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h index 456c0424ecb8dde17f98a900c5d77268cc672e34..1cf33cb34fa9cd7c9b8487a0a4a0011fb129e311 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -32,7 +32,8 @@ protected: std::shared_ptr fwdPD_; // Epsilon value used in the batch normalization formula. - static const real EPS; + real epsilon_; + // weight and bias in paddle std::unique_ptr weight_; std::unique_ptr biases_; @@ -73,18 +74,14 @@ public: void forward(PassType passType) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -98,11 +95,7 @@ protected: * moving = moving * AvgFraction + local * (1 - AvgFraction) */ void calMovingMeanAndVar(); - /** - * Forward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); @@ -115,12 +108,6 @@ protected: MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..44bb0883b89c712d70e2d4fdfe16bdfde86f81b7 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp @@ -0,0 +1,185 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNConcatLayer.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_concat, MKLDNNConcatLayer); + +bool MKLDNNConcatLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + CHECK_GT(inputLayers_.size(), 1UL); + CHECK(!biasParameter_); + return true; +} + +void MKLDNNConcatLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + ic = inputLayers_[0]->getSize() / ih / iw; + CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); + CHECK_GT(inputLayers_.size(), 1UL); + channels_.resize(inputLayers_.size()); + channels_[0] = ic; + oc = ic; + for (size_t i = 1; i < inputLayers_.size(); i++) { + int batchsize, height, witdh; + reshapeInput(batchsize, height, witdh, i); + CHECK_EQ(bs, batchsize); + CHECK_EQ(ih, height); + CHECK_EQ(iw, witdh); + + channels_[i] = inputLayers_[i]->getSize() / height / witdh; + CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); + oc += channels_[i]; + } + oh = ih; + ow = iw; + reshapeOutput(oh, ow); + resizeOutput(bs, oc * oh * ow); +} + +void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + resetFwdBuffers(inputs, out); + + std::shared_ptr fwdPD; + resetFwdPD(fwdPD, inputs, out); + + resetFwdPipeline(pipeline, fwdPD, inputs, out); +} + +void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + resetBwdBuffers(inputs, out); + + resetBwdPipeline(pipeline, bwds_, inputs, out); +} + +void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out) { + inputs.resize(inputLayers_.size()); + bool has8c = false, has16c = false, hasnc = false; + for (size_t i = 0; i < inputs.size(); i++) { + resetInValue(inputs[i], nullptr, i, channels_[i]); + CHECK(inputs[i]); + auto dm = inputs[i]->getDims(); + // inputs format can be different, but ndims must equal + CHECK(i == 0 || dm.size() == inputs[0]->getDims().size()); + CHECK_EQ(bs_, dm[0]); + CHECK_EQ(channels_[i], dm[1]); + if (dm.size() > 2) { + CHECK_EQ(ih_, dm[2]); + CHECK_EQ(iw_, dm[3]); + } + if (inputs[i]->getFormat() == format::nc) { + hasnc = true; + } + if (inputs[i]->getFormat() == format::nChw8c) { + has8c = true; + } + if (inputs[i]->getFormat() == format::nChw16c) { + has16c = true; + } + } + + format outFmt; + if (has16c && oc_ % 16 == 0) { + outFmt = format::nChw16c; + } else if (has8c && oc_ % 8 == 0) { + outFmt = format::nChw8c; + } else if (hasnc) { + CHECK(oh_ == 1 && ow_ == 1); + outFmt = format::nc; + } else { + outFmt = format::nchw; + } + memory::dims outDims = + hasnc ? memory::dims{bs_, oc_} : memory::dims{bs_, oc_, oh_, ow_}; + auto outPD = MKLDNNMatrix::createPrimitiveDesc(outDims, outFmt, engine_); + resetOutValue(out, outPD); +} + +void MKLDNNConcatLayer::resetFwdPD(std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr out) { + std::vector srcPDs; + for (size_t i = 0; i < inputs.size(); i++) { + srcPDs.push_back(inputs[i]->getPrimitiveDesc()); + } + CHECK(out); + pd.reset(new concat::primitive_desc(out->getMemoryDesc(), axis_, srcPDs)); + CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); +} + +void MKLDNNConcatLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + std::vector srcs; + for (size_t i = 0; i < inputs.size(); i++) { + srcs.push_back(*(inputs[i])); + } + fwd_.reset(new concat(*pd, srcs, *out)); + pipeline.push_back(*fwd_); +} + +void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out) { + CHECK(outVal_); + resetOutGrad(out, outVal_->getPrimitiveDesc()); + CHECK(out); + + inputs.resize(inputLayers_.size()); + for (size_t i = 0; i < inputs.size(); i++) { + CHECK(inVals_[i]); + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); + CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); + } +} + +void MKLDNNConcatLayer::resetBwdPipeline( + std::vector& pipeline, + std::vector>& prims, + std::vector& inputs, + MKLDNNMatrixPtr& out) { + // reset the backward primitives + memory::dims offsets = {0, 0, 0, 0}; + prims.resize(inputs.size()); + CHECK_EQ(inputs.size(), channels_.size()); + for (size_t i = 0; i < inputs.size(); i++) { + auto viewPD = view::primitive_desc( + out->getPrimitiveDesc(), inputs[i]->getDims(), offsets); + auto bwdPD = reorder::primitive_desc(viewPD.dst_primitive_desc(), + inputs[i]->getPrimitiveDesc()); + prims[i].reset(new reorder(bwdPD, *out, *(inputs[i]))); + offsets[axis_] += channels_[i]; + // push to pipeline + pipeline.push_back(*prims[i]); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..37f3a26c5ed5db10cdba507368874c9557fb75ef --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConcatLayer.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MKLDNNLayer Concatenate layer. + * + * The config file api is mkldnn_concat + */ +class MKLDNNConcatLayer : public MKLDNNLayer { +protected: + std::vector> bwds_; + // input channel numbers + std::vector channels_; + + // concat_dimension in MKLDNN + // if axis_ == 0, concat batchsize + // if axis_ == 1, concat channel (default) + int axis_; + +public: + explicit MKLDNNConcatLayer(const LayerConfig& config) + : MKLDNNLayer(config), axis_(1) {} + + ~MKLDNNConcatLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + std::vector& inputs, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + std::vector& inputs, + MKLDNNMatrixPtr& out) override; + + void printSizeInfo() override { + CHECK_EQ(channels_.size(), inputLayers_.size()); + for (size_t i = 0; i < channels_.size(); ++i) { + VLOG(MKLDNN_SIZES) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << bs_ << ", " << channels_[i] << ", " << ih_ + << ", " << iw_; + } + VLOG(MKLDNN_SIZES) << "Output: " << bs_ << ", " << oc_ << ", " << oh_ + << ", " << ow_; + } + + size_t keepCondition() { + // reset when the total element size of all inputs changed + size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt(); + for (size_t i = 1; i < inputLayers_.size(); ++i) { + totalSize += inputLayers_[i]->getOutputValue()->getElementCnt(); + } + return totalSize; + } + +protected: + void resetFwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + std::vector& inputs, + MKLDNNMatrixPtr& out); + void resetBwdBuffers(std::vector& inputs, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::vector>& prims, + std::vector& inputs, + MKLDNNMatrixPtr& out); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 8aa54e0a9efa7adb766cbb6009f6a29410c6ae7d..ab1d0f7b049a349c00c6e23deb37d789382de64f 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() { } void MKLDNNConvLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // cal output sizes @@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape( } void MKLDNNConvLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { resetFwdPD(fwdPD_); - resetFwdBuffers(fwdPD_, in, wgt, bias, out); + resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNConvLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; @@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector& pipeline, resetBwdDataPD(bwdDataPD); - resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { @@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD( loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); // create backward weight using input, output and weight value memory desc - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; CHECK(wgtVal_) << "Should have weight value"; algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; auto bwdWgtDesc = biasVal_ != nullptr ? conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), biasVal_->getMemoryDesc(), outVal_->getMemoryDesc(), @@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind) : conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), outVal_->getMemoryDesc(), strides, @@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind); pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc()); + CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ( outVal_, pd->diff_dst_primitive_desc(), @@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD( memory::dims wgtDims, biasDims, strides, dilations, padL, padR; loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; // create backward data using input and output value memory desc // but using weight memory desc with any format auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), MKLDNNMatrix::createMemoryDesc(wgtDims), outVal_->getMemoryDesc(), strides, @@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD( padding_kind::zero); pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); CHECK_PRIMITIVE_DESC_EQ( - inVal_, + inVals_[0], pd->diff_src_primitive_desc(), "primitive desc of in value and grad should be equal"); CHECK_PRIMITIVE_DESC_EQ( @@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); // add bwdWgt handle if (bias) { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index 9c69136684e5f9005860b476ec6ed1bbc9ceff6c..3e754a0e65771879e836c13d63d5a5c8be3a699a 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -69,18 +69,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -107,48 +103,26 @@ protected: mkldnn::memory::dims& padL, mkldnn::memory::dims& padR); - /** - * reset the forward primitive descriptor. - */ void resetFwdPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in forward. - */ void resetFwdBuffers(std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the forward pipeline. - */ void resetFwdPipeline(std::vector& pipeline, std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * reset the backward weight primitive descriptor. - */ void resetBwdWgtPD(std::shared_ptr& pd); - /** - * reset the backward data primitive descriptor. - */ void resetBwdDataPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in backward. - */ void resetBwdBuffers(std::shared_ptr& wgtPD, std::shared_ptr& dataPD, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the backward pipeline. - */ void resetBwdPipeline(std::vector& pipeline, std::shared_ptr& wgtPD, std::shared_ptr& dataPD, diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 350ec65fffbc73c3a6e4245f763f4c6aa868f574..c8778bdd077c4b6d170140be92bdcdd7e8e81bb2 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); @@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape( } void MKLDNNFcLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, wgt, bias, out); + resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out); - resetFwdPD(fwdPD_, in, wgt, bias, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; - resetBwdBuffers(in, wgt, bias, out); + resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out); - resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out); - resetBwdDataPD(bwdDataPD, in, out); + resetBwdDataPD(bwdDataPD, inputs[0], out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { @@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); CHECK(wgtVal_); resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); @@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); - fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); + CHECK(inVals_[0]); + fc_bwdWgt::desc bwdWgtDesc = + bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } @@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); if (bias) { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index ee861763ff3dc10ddb4c119358b80dbe1614aecb..283dc9b540531f6009ae6e2485b7c12d4e5cf2e3 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -52,18 +52,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -73,11 +69,6 @@ public: void convertWeightsToPaddle() override; protected: - /** - * Forward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, @@ -93,13 +84,6 @@ protected: MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor for backward weight, - * reset primitive descriptor for backward data, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index e75ac5ba4647a8267b7bc189893bd7adb5c3053f..6fbf3c7fdec2f537769adb660c67c5a597beb609 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -21,8 +21,8 @@ namespace paddle { bool MKLDNNLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " + CHECK(FLAGS_use_mkldnn) << "MKLDNNLayers only support use_mkldnn." + << "Please set WITH_MKL=ON " << "and set use_mkldnn=True"; CHECK(!useGpu_) << "Do not support GPU yet"; @@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); CHECK(!inputLayers_.empty()); copySeqInfoToOutputs(); - size_t elemenCnt = inputLayers_[0]->getOutputValue()->getElementCnt(); - if (inputElemenCnt_ != elemenCnt) { + if (condition_ != keepCondition()) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - // reset when input total sizes changed, not only the batchsize - inputElemenCnt_ = elemenCnt; - pipelineFwd_.clear(); + condition_ = keepCondition(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); - // all cpu device output grad or value share output's + printSizeInfo(); + // the output_.value and output_.grad are shared with CPU device shareCPUDevice(); - resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - // MKLDNNLayer output value should be MKLDNNMatrix - // so external output value is necessary. - // Then external input value is not necessary, - // since input may be mkldnn internal buffer. - CHECK(extOutVal_) << "external output value is necessary"; - output_.value = std::dynamic_pointer_cast(extOutVal_); - CHECK(inVal_ && outVal_) << "internal memories are necessary"; - if (cvtInVal_) { - pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_); - } - if (cvtOutVal_) { - pipelineFwd_.push_back(*cvtOutVal_); - } + pipelineFwd_.clear(); + inVals_.resize(inputLayers_.size(), nullptr); + extInVals_.resize(inputLayers_.size(), nullptr); + cvtInVals_.resize(inputLayers_.size(), nullptr); + resetFwd(pipelineFwd_, inVals_, outVal_); + prepareValueConversions(pipelineFwd_); convertWeightsFromPaddle(); - printSizeInfo(); printValueFormat(); needResetBwd_ = true; } @@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { // Update input value data when input layer is "data" type, // since the input value data address might be changed. - CHECK(extInVal_); - extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); + CHECK(extInVals_[0]); + extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData()); } if (!outputOnlyMKLDNN_) { @@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { if (needResetBwd_) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; pipelineBwd_.clear(); + inGrads_.resize(inputLayers_.size(), nullptr); + extInGrads_.resize(inputLayers_.size(), nullptr); + cvtInGrads_.resize(inputLayers_.size(), nullptr); pipelineMergeGrad_.clear(); mergeGrad_ = nullptr; - resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); - // external output grad is not necessary - // since output may be mkldnn internal buffer or merge them directly. - CHECK(outGrad_) << "internal output grad is necessary"; - if (extOutGrad_) { - CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) - << "the external buffer should share the same data with output_.grad"; - } - if (cvtOutGrad_) { - pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_); - } - if (cvtInGrad_) { - pipelineBwd_.push_back(*cvtInGrad_); - } + resetBwd(pipelineBwd_, inGrads_, outGrad_); + prepareGradConversions(pipelineBwd_); printGradFormat(); needResetBwd_ = false; } @@ -138,8 +118,11 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { } } -void MKLDNNLayer::reshapeInput(int& batchsize, int& height, int& width) { - const Argument& input = inputLayers_[0]->getOutput(); +void MKLDNNLayer::reshapeInput(int& batchsize, + int& height, + int& width, + size_t idx) { + const Argument& input = inputLayers_[idx]->getOutput(); batchsize = input.getBatchSize(); int h = input.getFrameHeight(); int w = input.getFrameWidth(); @@ -173,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, void MKLDNNLayer::resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD, - size_t inputIdx) { - cvtInVal_ = nullptr; - extInVal_ = nullptr; + size_t idx, + int inputChannel) { + cvtInVals_[idx] = nullptr; + extInVals_[idx] = nullptr; in = nullptr; - CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); + inputChannel = inputChannel == 0 ? ic_ : inputChannel; + CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0); auto extPD = MKLDNNMatrix::createPrimitiveDesc( - {bs_, ic_, ih_, iw_}, format::nchw, engine_); - const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); - extInVal_ = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); - if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { - extInVal_ = MKLDNNMatrix::create(extPD, inMat); + {bs_, inputChannel, ih_, iw_}, format::nchw, engine_); + const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue(); + extInVals_[idx] = std::dynamic_pointer_cast(inMat); + CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr); + if (extInVals_[idx] == nullptr || + extInVals_[idx]->getFormat() == format::nc) { + extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat); } - in = extInVal_; + in = extInVals_[idx]; if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { return; } // need create reorder in = MKLDNNMatrix::create(*intPD); - cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); - CHECK(cvtInVal_) << "should not be emptry"; + cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in); + CHECK(cvtInVals_[idx]) << "should not be emptry"; } void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, @@ -215,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, memory::primitive_desc intPD, - size_t inputIdx) { - cvtInGrad_ = nullptr; - extInGrad_ = nullptr; + size_t idx) { + cvtInGrads_[idx] = nullptr; + extInGrads_[idx] = nullptr; in = nullptr; - LayerPtr& input = inputLayers_[inputIdx]; + LayerPtr& input = inputLayers_[idx]; if (input->getOutputGrad() == nullptr) { // no need input grad return; @@ -234,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, in = MKLDNNMatrix::create(intPD, inMat); Argument& arg = input->getOutput(this->getName()); arg.grad = std::dynamic_pointer_cast(in); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); if (inputIsOnlyMKLDNN()) { return; } - extInGrad_ = in; - if (isPaddleFormat(extInGrad_->getFormat())) { + extInGrads_[idx] = in; + if (isPaddleFormat(extInGrads_[idx]->getFormat())) { return; } // need create reorder - CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) + CHECK(extInVals_[idx] != nullptr && + isPaddleFormat(extInVals_[idx]->getFormat())) << "should have external input value and the format must be nchw(nc)"; - extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + extInGrads_[idx] = + MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); in = MKLDNNMatrix::create(intPD); - cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_); - CHECK(cvtInGrad_); + cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]); + CHECK(cvtInGrads_[idx]); } void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out, @@ -306,22 +294,8 @@ void MKLDNNLayer::resetMergeGrad(MKLDNNMatrixPtr& out) { srcs.push_back(*src); } - // TODO(TJ): remove me when mkldnn sum support different formats - for (size_t i = 1; i < srcPDs.size(); ++i) { - CHECK(srcPDs[0] == srcPDs[i]); - } - tmpOutGrad_ = out; - tmpCvt_ = nullptr; - if (out->getPrimitiveDesc() != srcPDs[0]) { - tmpOutGrad_ = MKLDNNMatrix::create(srcPDs[0]); - tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out); - CHECK(tmpCvt_); - pipelineMergeGrad_.push_back(*tmpCvt_); - } - - auto sumPD = - sum::primitive_desc(tmpOutGrad_->getMemoryDesc(), scales, srcPDs); - mergeGrad_.reset(new sum(sumPD, srcs, *tmpOutGrad_)); + auto sumPD = sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs); + mergeGrad_.reset(new sum(sumPD, srcs, *out)); pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_); } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 7479c34c92b5231b2521493bc631474d4efd4224..e48b9b5a91f7f17cb3f31e9140f1428ba8954a20 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -34,15 +34,16 @@ typedef std::shared_ptr MKLDNNLayerPtr; */ class MKLDNNLayer : public Layer { protected: - // input value element count - size_t inputElemenCnt_; // batch size int bs_; + // their sizes are always from the first input layer // input image channel, height and width int ic_, ih_, iw_; // output image channel, height and width int oc_, oh_, ow_; + // the condition that forward need be reset + size_t condition_; // backward also need reset after reset forward handle bool needResetBwd_; @@ -67,18 +68,18 @@ protected: * When all layers are mkldnn layers, they could save internal data. */ // below MKLDNNMatrix buffers are all internal buffers - MKLDNNMatrixPtr inVal_; - MKLDNNMatrixPtr inGrad_; + std::vector inVals_; + std::vector inGrads_; MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outGrad_; // below are external value and grad - MKLDNNMatrixPtr extInVal_; - MKLDNNMatrixPtr extInGrad_; + std::vector extInVals_; + std::vector extInGrads_; MKLDNNMatrixPtr extOutVal_; MKLDNNMatrixPtr extOutGrad_; // convert handle between external and internal buffers - std::shared_ptr cvtInVal_; - std::shared_ptr cvtInGrad_; + std::vector> cvtInVals_; + std::vector> cvtInGrads_; std::shared_ptr cvtOutVal_; std::shared_ptr cvtOutGrad_; @@ -93,23 +94,11 @@ protected: std::vector pipelineMergeGrad_; // tmp input argument to save input grad, only used to merge grad Argument tmpInArg_; - // since mkldnn sum do not support different formats: - // can refer to https://github.com/01org/mkl-dnn/issues/134 - // so need create reorder manually and save tmp MKLDNNMatrix - MKLDNNMatrixPtr tmpOutGrad_; - std::shared_ptr tmpCvt_; public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), - inputElemenCnt_(0), - bs_(0), - ic_(0), - ih_(0), - iw_(0), - oc_(0), - oh_(0), - ow_(0), + condition_(0), needResetBwd_(true), outputOnlyMKLDNN_(false), engine_(mkldnn::engine::cpu, 0), @@ -125,31 +114,28 @@ public: virtual void backward(const UpdateCallback& callback); /** - * reshape the input image sizes - * and reset output image and buffer size - * output channel can not be changed + * reshape the input and output channels and image sizes + * and reset output buffer size */ virtual void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0; /** * reset the mkldnn forward primitve and memories * only would be called when input size changes + * weight and bias buffers should be coverd by child class itself */ virtual void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** * reset the mkldnn backward primitve and memories * only would be called when needed + * weight and bias buffers should be coverd by child class itself */ virtual void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** @@ -175,10 +161,19 @@ public: void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } protected: + /** + * Some layers may have different condition to reset the forward. + * The function returns the condition that do not need reset forward. + */ + inline virtual size_t keepCondition() { + // reset when the first input element size changed, not only the batchsize + return inputLayers_[0]->getOutputValue()->getElementCnt(); + } + /** * reshape the input image sizes and input batchsize */ - void reshapeInput(int& batchsize, int& height, int& width); + void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0); /** * reshape output image sizes @@ -196,11 +191,13 @@ protected: /** * reset input value from input MKLDNNMatrix and internal primitive desc. * reset both internal and external buffer and create reorder if necessary. + * input channel may be different in concat. */ void resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD = nullptr, - size_t inputIdx = 0); + size_t idx = 0, + int inputChannel = 0); /** * reset output value from internal primitive desc. @@ -215,7 +212,7 @@ protected: */ void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD, - size_t inputIdx = 0); + size_t idx = 0); /** * reset output grad from internal primitive desc. @@ -293,17 +290,19 @@ protected: * print the mkldnn memory format of value */ virtual void printValueFormat() { - if (extInVal_) { - VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> "; - } - if (inVal_) { - VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>"; + for (size_t i = 0; i < inVals_.size(); ++i) { + if (!inVals_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInVals_[i] ? extInVals_[i]->getFormat() + : inVals_[i]->getFormat()) + << " >>> " << inVals_[i]->getFormat() << " >>>"; } if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); + VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> " + << (extOutVal_ ? extOutVal_->getFormat() + : outVal_->getFormat()); } if (wgtVal_) { VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); @@ -317,17 +316,19 @@ protected: * print the mkldnn memory format of grad */ virtual void printGradFormat() { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; + VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< " + << (extOutGrad_ ? extOutGrad_->getFormat() + : outGrad_->getFormat()); } - if (inGrad_) { - VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<<"; - } - if (extInGrad_) { - VLOG(MKLDNN_FMTS) << extInGrad_->getFormat() << " <<< "; + for (size_t i = 0; i < inGrads_.size(); ++i) { + if (!inGrads_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat() + : inGrads_[i]->getFormat()) + << " <<< " << inGrads_[i]->getFormat() << " <<<"; } if (wgtGrad_) { VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat(); @@ -434,6 +435,41 @@ private: outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } } + + void prepareValueConversions(std::vector& pipeline) { + // MKLDNNLayer output value should be MKLDNNMatrix + // so external output value is necessary. + // Then external input value is not necessary, + // since input may be mkldnn internal buffer. + CHECK(extOutVal_) << "external output value is necessary"; + output_.value = std::dynamic_pointer_cast(extOutVal_); + CHECK(inVals_[0] && outVal_) << "internal memories are necessary"; + for (size_t i = 0; i < cvtInVals_.size(); ++i) { + if (cvtInVals_[i]) { + pipeline.insert(pipeline.begin(), *cvtInVals_[i]); + } + } + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } + } + void prepareGradConversions(std::vector& pipeline) { + // external output grad is not necessary + // since output may be mkldnn internal buffer or merge them directly. + CHECK(outGrad_) << "internal output grad is necessary"; + if (extOutGrad_) { + CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) + << "the external buffer should share the same data with output_.grad"; + } + if (cvtOutGrad_) { + pipeline.insert(pipeline.begin(), *cvtOutGrad_); + } + for (size_t i = 0; i < cvtInGrads_.size(); ++i) { + if (cvtInGrads_[i]) { + pipeline.push_back(*cvtInGrads_[i]); + } + } + } }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index a18c455beab96ef25b5545281bae4d48cec98d9e..a8252593c8fbb8013ab909e74a057850ba54bcaa 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap, } void MKLDNNPoolLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; // cal output sizes @@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape( } void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, out); + resetFwdBuffers(inputs[0], out); - resetFwdPD(fwdPD_, in, out); + resetFwdPD(fwdPD_, inputs[0], out); - resetFwdPipeline(pipeline, fwdPD_, in, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); } void MKLDNNPoolLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, out); + resetBwdBuffers(inputs[0], out); - resetBwdPD(pd, in, out); + resetBwdPD(pd, inputs[0], out); - resetBwdPipeline(pipeline, pd, in, out); + resetBwdPipeline(pipeline, pd, inputs[0], out); } void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, @@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline( void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/gserver/layers/MKLDNNPoolLayer.h index c5ec87828bfb28b4502b4ec6b47287089c514204..dad60156f0ef7caa059ff6c70d1040e7e34c938f 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.h +++ b/paddle/gserver/layers/MKLDNNPoolLayer.h @@ -53,18 +53,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void printSizeInfo() override { @@ -75,11 +71,6 @@ public: } protected: - /** - * Forward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetFwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr in, @@ -88,12 +79,6 @@ protected: std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetBwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr& in, diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 35d4b12d3d357800fe72899069b5377c252fac5f..02402894d3354a6af221948a3360ef830881bf39 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -100,8 +100,9 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0UL); CHECK_LT(roiBatchIdx, batchSize); - size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); - size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); + size_t roiHeight = + std::max(roiEndH - roiStartH + 1, static_cast(1)); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, static_cast(1)); real binSizeH = static_cast(roiHeight) / static_cast(pooledHeight_); real binSizeW = @@ -114,10 +115,14 @@ void ROIPoolLayer::forward(PassType passType) { size_t wstart = static_cast(std::floor(pw * binSizeW)); size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); - hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); - wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); - hend = std::min(std::max(hend + roiStartH, 0UL), height_); - wend = std::min(std::max(wend + roiStartW, 0UL), width_); + hstart = std::min( + std::max(hstart + roiStartH, static_cast(0)), height_); + wstart = std::min( + std::max(wstart + roiStartW, static_cast(0)), width_); + hend = std::min(std::max(hend + roiStartH, static_cast(0)), + height_); + wend = std::min(std::max(wend + roiStartW, static_cast(0)), + width_); bool isEmpty = (hend <= hstart) || (wend <= wstart); size_t poolIndex = ph * pooledWidth_ + pw; diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 4bea348f637f39444e8aad89278e6366ecd73b1d..c295ea19c9ccb3d05c509a41925d2c36efdba8ef 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -29,7 +29,7 @@ gserver_test(test_KmaxSeqScore) gserver_test(test_Expand) gserver_test(test_MaxPoolingWithMaskOutput) -########## test_Mkldnn layers and activations ########## +########## test_MKLDNN layers and activations ########## if(WITH_MKLDNN) add_unittest_without_exec(test_MKLDNN test_MKLDNN.cpp @@ -62,17 +62,6 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE) endif() if(NOT MOBILE_INFERENCE) -################### test_ProtoDataProvider ############ - add_unittest_without_exec(test_ProtoDataProvider - test_ProtoDataProvider.cpp) - - # test_ProtoDataProvider will mkdir as same name, - # so if WORKING_DIRECTORY is default directory, then - # mkdir will get error. - add_test(NAME test_ProtoDataProvider - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) - ################## test_Evaluator ####################### add_unittest(test_Evaluator test_Evaluator.cpp) @@ -110,3 +99,24 @@ add_test(NAME test_PyDataProvider2 COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle ) + +################# test_CompareSparse ################## +add_unittest_without_exec(test_CompareSparse + test_CompareSparse.cpp) +if(NOT ON_TRAVIS) + add_test(NAME test_CompareSparse + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests + ./.set_port.sh -p port -n 6 + ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) +endif() + +################ test_CompareTwoNets ###################### +add_unittest_without_exec(test_CompareTwoNets + test_CompareTwoNets.cpp) +add_test(NAME test_CompareTwoNets + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests + ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index ca55a45bc77b4e171619ab788d7c7dfeefcd036a..9d61533c0b6f20c41130d7b7c15ad93392b2d24c 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { /** - * @brief test the functionality of Mkldnnlayers + * @brief test the functionality of MKLDNNlayers and MKLDNNActivations * refer to paddle original function */ class MKLDNNTester { diff --git a/paddle/gserver/tests/proto_files.txt b/paddle/gserver/tests/proto_files.txt deleted file mode 100644 index 691b38c7940bd21360eb00384e060554aa4b3e22..0000000000000000000000000000000000000000 --- a/paddle/gserver/tests/proto_files.txt +++ /dev/null @@ -1,2 +0,0 @@ -./test_ProtoDataProvider/data1.bin -./test_ProtoDataProvider/data2.bin diff --git a/paddle/gserver/tests/proto_files_compressed.txt b/paddle/gserver/tests/proto_files_compressed.txt deleted file mode 100644 index 7413c81e185d02e0d03aefa06480b9722357c5eb..0000000000000000000000000000000000000000 --- a/paddle/gserver/tests/proto_files_compressed.txt +++ /dev/null @@ -1,2 +0,0 @@ -./test_ProtoDataProvider/data1.bin.gz -./test_ProtoDataProvider/data2.bin.gz diff --git a/paddle/gserver/tests/sequence_lstm.conf b/paddle/gserver/tests/sequence_lstm.conf new file mode 100644 index 0000000000000000000000000000000000000000..f49a827f22edce056eaf9903e99b732cab7f3784 --- /dev/null +++ b/paddle/gserver/tests/sequence_lstm.conf @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_file = dict() +for line_count, line in enumerate(open(dict_path, "r")): + dict_file[line.strip()] = line_count + +define_py_data_sources2( + train_list='gserver/tests/Sequence/train.list', + test_list=None, + module='sequenceGen', + obj='process', + args={"dict_file": dict_file}) + +settings(batch_size=5) +######################## network configure ################################ +dict_dim = len(open(dict_path, 'r').readlines()) +word_dim = 128 +hidden_dim = 256 +label_dim = 3 +sparse_update = get_config_arg("sparse_update", bool, False) + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer( + input=data, + size=word_dim, + param_attr=ParamAttr(sparse_update=sparse_update)) + +with mixed_layer(size=hidden_dim * 4) as lstm_input: + lstm_input += full_matrix_projection(input=emb) + +lstm = lstmemory( + input=lstm_input, + act=TanhActivation(), + gate_act=SigmoidActivation(), + state_act=TanhActivation()) + +lstm_last = last_seq(input=lstm) + +with mixed_layer( + size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: + output += full_matrix_projection(input=lstm_last) + +outputs( + classification_cost( + input=output, label=data_layer( + name="label", size=1))) diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/gserver/tests/sequence_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..4895df186bfecc5cb5263676a9cd5bac5039d565 --- /dev/null +++ b/paddle/gserver/tests/sequence_recurrent.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_file = dict() +for line_count, line in enumerate(open(dict_path, "r")): + dict_file[line.strip()] = line_count + +define_py_data_sources2( + train_list='gserver/tests/Sequence/train.list', + test_list=None, + module='sequenceGen', + obj='process', + args={"dict_file": dict_file}) + +settings(batch_size=5) +######################## network configure ################################ +dict_dim = len(open(dict_path, 'r').readlines()) +word_dim = 128 +hidden_dim = 128 +label_dim = 3 + +# This config is designed to be equivalent with sequence_recurrent_group.py + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer( + input=data, size=word_dim, param_attr=ParamAttr(name="emb")) + +recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation()) + +recurrent_last = last_seq(input=recurrent) + +with mixed_layer( + size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: + output += full_matrix_projection(input=recurrent_last) + +outputs( + classification_cost( + input=output, label=data_layer( + name="label", size=1))) diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py new file mode 100644 index 0000000000000000000000000000000000000000..a1d54542e3bc4e89f70d31d5e89c0f44953c9f90 --- /dev/null +++ b/paddle/gserver/tests/sequence_recurrent_group.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_file = dict() +for line_count, line in enumerate(open(dict_path, "r")): + dict_file[line.strip()] = line_count + +define_py_data_sources2( + train_list='gserver/tests/Sequence/train.list', + test_list=None, + module='sequenceGen', + obj='process', + args={"dict_file": dict_file}) + +settings(batch_size=5) +######################## network configure ################################ +dict_dim = len(open(dict_path, 'r').readlines()) +word_dim = 128 +hidden_dim = 128 +label_dim = 3 + +# This config is designed to be equivalent with sequence_recurrent.py + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer( + input=data, size=word_dim, param_attr=ParamAttr(name="emb")) + + +def step(y): + mem = memory(name="rnn_state", size=hidden_dim) + with mixed_layer( + name="rnn_state", + size=hidden_dim, + bias_attr=False, + act=SoftmaxActivation()) as out: + out += identity_projection(input=y) + out += full_matrix_projection( + input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__")) + return out + + +recurrent = recurrent_group(name="rnn", step=step, input=emb) + +recurrent_last = last_seq(input=recurrent) + +with mixed_layer( + size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: + output += full_matrix_projection(input=recurrent_last) + +outputs( + classification_cost( + input=output, label=data_layer( + name="label", size=1))) diff --git a/paddle/trainer/tests/test_CompareSparse.cpp b/paddle/gserver/tests/test_CompareSparse.cpp similarity index 98% rename from paddle/trainer/tests/test_CompareSparse.cpp rename to paddle/gserver/tests/test_CompareSparse.cpp index 5f1834bd730375fc10762fc19788d0c693f8e752..c6e07650fc4805a25baf38b9059f6c996d00cafc 100644 --- a/paddle/trainer/tests/test_CompareSparse.cpp +++ b/paddle/gserver/tests/test_CompareSparse.cpp @@ -22,8 +22,7 @@ limitations under the License. */ using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& configFile1 = - "trainer/tests/sample_trainer_config_compare_sparse.conf"; +static const string& configFile1 = "gserver/tests/sequence_lstm.conf"; DECLARE_bool(use_gpu); DECLARE_string(config); diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/gserver/tests/test_CompareTwoNets.cpp similarity index 95% rename from paddle/trainer/tests/test_CompareTwoNets.cpp rename to paddle/gserver/tests/test_CompareTwoNets.cpp index 94f65e545d116c802fb4877dc14f07aaaf83a4fb..801d9607565910b1f7f68a9c4532de5877e44f30 100644 --- a/paddle/trainer/tests/test_CompareTwoNets.cpp +++ b/paddle/gserver/tests/test_CompareTwoNets.cpp @@ -30,8 +30,6 @@ DECLARE_bool(use_gpu); DECLARE_string(config); DECLARE_string(nics); -DEFINE_string(config_file_a, "", "config of one network to compare"); -DEFINE_string(config_file_b, "", "config of another network to compare"); DEFINE_bool(need_high_accuracy, false, "whether need to run in double accuracy"); @@ -42,6 +40,10 @@ DEFINE_double( DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_int32(seed); +static const string& config_file_a = "gserver/tests/sequence_recurrent.py"; +static const string& config_file_b = + "gserver/tests/sequence_recurrent_group.py"; + struct ComData { vector outArgs; vector parameters; @@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) { DataBatch dataBatch; int32_t batchSize = trainer.getConfig().opt_config().batch_size(); + trainer.getDataProvider()->reset(); trainer.getDataProvider()->setSkipShuffle(); trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); @@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) { TEST(Trainer, create) { ComData dataA; - calcGradient(dataA, FLAGS_config_file_a); + calcGradient(dataA, config_file_a); LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n"; ComData dataB; - calcGradient(dataB, FLAGS_config_file_b); + calcGradient(dataB, config_file_b); LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n"; compareGradient(dataA, dataB); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 3517d293e3c901caaa19952b04e56d1ef0d2b46e..cacf10692942f5eca2f6c498183f4acc00768460 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -583,6 +583,7 @@ TEST(Layer, maxoutLayer) { testLayerGrad(config, "maxout", 10, false, useGpu); } } + void testFcLayer(string format, size_t nnz) { TestConfig config; config.biasSize = 1024; @@ -1081,6 +1082,21 @@ TEST(Layer, InterpolationLayer) { } } +TEST(Layer, DotProdLayer) { + TestConfig config; + config.layerConfig.set_type("dot_prod"); + config.layerConfig.set_size(1); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "dot_prod", 10, false, useGpu); + } +} + TEST(Layer, OuterProdLayer) { TestConfig config; config.layerConfig.set_type("out_prod"); @@ -2429,6 +2445,25 @@ TEST(Layer, ScaleSubRegionLayer) { } } +TEST(Layer, L2DistanceLayer) { + TestConfig config; + config.layerConfig.set_type("l2_distance"); + config.layerConfig.set_size(1); + config.biasSize = 0; + + const size_t input_dim = 27; + const size_t batch_size = 11; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", input_dim, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", input_dim, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "l2_distance", batch_size, false, useGpu); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index a859e34c8996d81f14bf1edcb6e23d5a4f687e6b..56b523f220c2a405851b89db5f63e9aa50bfaaf7 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -313,6 +313,47 @@ TEST(MKLDNNLayer, AddtoLayer) { testAddtoLayer({4, 12, 1, 1}, 3); } +static void getMKLDNNConcatConfig(TestConfig& cfg, + const std::vector& inputs) { + CHECK_GE(inputs.size(), 2UL) << "at least two inputs"; + int oc = inputs[0].ic; + for (size_t i = 1; i < inputs.size(); ++i) { + CHECK_EQ(inputs[i].bs, inputs[0].bs); + CHECK_EQ(inputs[i].ih, inputs[0].ih); + CHECK_EQ(inputs[i].iw, inputs[0].iw); + oc += inputs[i].ic; + } + cfg.biasSize = 0; + cfg.layerConfig.set_type("mkldnn_concat"); + cfg.layerConfig.set_size(oc * inputs[0].ih * inputs[0].iw); + cfg.layerConfig.set_active_type("relu"); + for (size_t i = 0; i < inputs.size(); ++i) { + std::stringstream ss; + ss << "layer_" << i; + cfg.inputDefs.push_back( + {INPUT_DATA, + ss.str(), + (size_t)(inputs[i].ic) * inputs[i].ih * inputs[i].iw, + 0}); + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(inputs[i].ic); + img_conf->set_img_size_y(inputs[i].ih); + img_conf->set_img_size(inputs[i].iw); + } +} + +void testConcatLayer(const std::vector& inputs) { + TestConfig dnnConfig; + getMKLDNNConcatConfig(dnnConfig, inputs); + RUN_MKLDNN_TEST_LAYER(dnnConfig, "concat", inputs[0]) +} + +TEST(MKLDNNLayer, ConcatLayer) { + testConcatLayer({{64, 128, 1, 1}, {64, 32, 1, 1}, {64, 64, 1, 1}}); + testConcatLayer({{32, 100, 8, 8}, {32, 10, 8, 8}}); +} + void testActivation(std::string actType, const testImageDesc& pm) { // TODO(TJ): remove me when paddle support elu activation if (actType == "mkldnn_elu") { diff --git a/paddle/gserver/tests/test_ProtoDataProvider.cpp b/paddle/gserver/tests/test_ProtoDataProvider.cpp deleted file mode 100644 index af6472619d1840e82787974d265d601b4a406c09..0000000000000000000000000000000000000000 --- a/paddle/gserver/tests/test_ProtoDataProvider.cpp +++ /dev/null @@ -1,732 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include - -#include "paddle/gserver/dataproviders/ProtoDataProvider.h" -#include "paddle/utils/Util.h" - -#include "paddle/testing/TestUtil.h" - -using namespace std; // NOLINT - -std::vector protoFiles{ - "./test_ProtoDataProvider/data1.bin", "./test_ProtoDataProvider/data2.bin", -}; -std::vector protoFilesCompressed{ - "./test_ProtoDataProvider/data1.bin.gz", - "./test_ProtoDataProvider/data2.bin.gz", -}; - -const char* kTestDir = "./test_ProtoDataProvider"; -const char kProtoFileList[] = "gserver/tests/proto_files.txt"; -const char kProtoFileListCompressed[] = - "gserver/tests/proto_files_compressed.txt"; -const int kSpraseMatrixDim = 1024; - -using namespace paddle; // NOLINT - -void prepareData(DataBatch* batch, - const int* numPerSlotType, - bool iid, - bool useGpu) { - batch->clear(); - int64_t size = uniformRandom(100) + 10; - batch->setSize(size); - - ICpuGpuVectorPtr sequenceStartPositions; - ICpuGpuVectorPtr subSequenceStartPositions; - if (!iid) { - int numSeqs = uniformRandom(10) + 1; - sequenceStartPositions = - ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); - int* buf = sequenceStartPositions->getMutableData(false); - subSequenceStartPositions = - ICpuGpuVector::create(numSeqs + 1, /* useGpu= */ false); - int* subBuf = subSequenceStartPositions->getMutableData(false); - int64_t pos = 0; - int maxLen = 2 * size / numSeqs; - for (int i = 0; i < numSeqs; ++i) { - int len = - uniformRandom(min(maxLen, size - pos - numSeqs + i)) + 1; - buf[i] = pos; - subBuf[i] = pos; - pos += len; - VLOG(1) << " len=" << len; - } - buf[numSeqs] = size; - subBuf[numSeqs] = size; - } - - vector& arguments = batch->getStreams(); - for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_DENSE]; ++i) { - int64_t dim = rand() % 10 + 4; // NOLINT rand_r - MatrixPtr mat = Matrix::create(size, dim, /* trans= */ false, false); - mat->randomizeUniform(); - Argument arg; - arg.value = mat; - arg.sequenceStartPositions = sequenceStartPositions; - arguments.push_back(arg); - } - for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE]; ++i) { - MatrixPtr mat = - makeRandomSparseMatrix(size, kSpraseMatrixDim, false, useGpu); - Argument arg; - arg.value = mat; - arg.sequenceStartPositions = sequenceStartPositions; - arg.subSequenceStartPositions = subSequenceStartPositions; - arguments.push_back(arg); - } - for (int i = 0; i < numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE]; ++i) { - MatrixPtr mat = - makeRandomSparseMatrix(size, kSpraseMatrixDim, true, useGpu); - Argument arg; - arg.value = mat; - arg.sequenceStartPositions = sequenceStartPositions; - arguments.push_back(arg); - } - for (int i = 0; i < numPerSlotType[SlotDef::STRING]; ++i) { - int64_t dim = rand() % 10 + 4; // NOLINT rand_r - SVectorPtr vec = std::make_shared>(); - for (int j = 0; j < size; ++j) { - vec->push_back(randStr(dim)); - } - Argument arg; - arg.strs = vec; - arg.sequenceStartPositions = sequenceStartPositions; - arguments.push_back(arg); - } - for (int i = 0; i < numPerSlotType[SlotDef::INDEX]; ++i) { - int64_t dim = rand() % 10 + 4; // NOLINT rand_r - IVectorPtr vec = IVector::create(size, /* useGpu= */ false); - int* buf = vec->getData(); - for (int j = 0; j < size; ++j) { - buf[j] = uniformRandom(dim); - } - Argument arg; - arg.ids = vec; - arg.sequenceStartPositions = sequenceStartPositions; - arguments.push_back(arg); - } -} - -inline int getSlotDim(const Argument& arg) { - if (arg.value) { - return arg.value->getWidth(); - } else if (arg.ids) { - return arg.ids->getMax() + 1; - } else if (arg.strs) { - return 1; - } - LOG(FATAL) << "Invalid argument"; - return 0; -} - -inline SlotDef::SlotType getSlotType(const Argument& arg) { - if (arg.value) { - auto& m = *arg.value; - auto& type = typeid(m); - if (type == typeid(CpuMatrix) || type == typeid(GpuMatrix)) { - return SlotDef::VECTOR_DENSE; - } - if (type == typeid(CpuSparseMatrix)) { - auto valueType = - std::dynamic_pointer_cast(arg.value)->getValueType(); - if (NO_VALUE == valueType) { - return SlotDef::VECTOR_SPARSE_NON_VALUE; - } else { - return SlotDef::VECTOR_SPARSE_VALUE; - } - } - if (type == typeid(GpuSparseMatrix)) { - auto valueType = - std::dynamic_pointer_cast(arg.value)->getValueType(); - if (NO_VALUE == valueType) { - return SlotDef::VECTOR_SPARSE_NON_VALUE; - } else { - return SlotDef::VECTOR_SPARSE_VALUE; - } - } - - LOG(FATAL) << "Unknown matrix type"; - } - if (arg.ids) return SlotDef::INDEX; - if (arg.strs) return SlotDef::STRING; - LOG(FATAL) << "Invalid argument"; - return SlotDef::VECTOR_DENSE; -} - -void getColRow(const Argument& arg, - int64_t pos, - bool useGpu, - int* colNum, - const int** rowCols, - const real** rowValues) { - SlotDef::SlotType type = getSlotType(arg); - GpuSparseMatrixPtr matGpu; - CpuSparseMatrixPtr matCpu; - if (useGpu) { - matGpu = dynamic_pointer_cast(arg.value); - ASSERT_TRUE(matGpu != NULL); - } else { - matCpu = dynamic_pointer_cast(arg.value); - ASSERT_TRUE(matCpu != NULL); - } - *colNum = useGpu ? matGpu->getColNum(pos) : matCpu->getColNum(pos); - *rowCols = useGpu ? matGpu->getRowCols(pos) : matCpu->getRowCols(pos); - if (type == SlotDef::VECTOR_SPARSE_VALUE) { - *rowValues = useGpu ? matGpu->getRowValues(pos) : matCpu->getRowValues(pos); - } else { - *rowValues = NULL; - } -} - -void makeSample(const vector& arguments, - int64_t pos, - bool isBeginning, - DataSample* sample, - bool useGpu) { - sample->set_is_beginning(isBeginning); - int slotid = 0; - for (auto& arg : arguments) { - SlotDef::SlotType type = getSlotType(arg); - int64_t dim = getSlotDim(arg); - switch (type) { - case SlotDef::VECTOR_DENSE: { - VectorSlot* vecSlot = sample->add_vector_slots(); - auto values = vecSlot->mutable_values(); - values->Reserve(dim); - for (int i = 0; i < dim; ++i) { - values->AddAlreadyReserved( - static_cast(arg.value->getElement(pos, i))); - } - break; - } - case SlotDef::INDEX: { - sample->add_id_slots(arg.ids->get(pos)); - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - VectorSlot* vecSlot = sample->add_vector_slots(); - auto ids = vecSlot->mutable_ids(); - int colNum; - const int* rowCols; - const real* rowValues; // nullptr - getColRow(arg, pos, useGpu, &colNum, &rowCols, &rowValues); - ids->Reserve(colNum); - for (int i = 0; i < colNum; ++i) { - ids->AddAlreadyReserved(rowCols[i]); - } - SubseqSlot* subseqSlot = sample->add_subseq_slots(); // subseq - subseqSlot->set_slot_id(slotid); - auto lens = subseqSlot->mutable_lens(); - lens->Add(colNum); - break; - } - case SlotDef::VECTOR_SPARSE_VALUE: { - VectorSlot* vecSlot = sample->add_vector_slots(); - auto values = vecSlot->mutable_values(); - auto ids = vecSlot->mutable_ids(); - int colNum; - const int* rowCols; - const real* rowValues; - getColRow(arg, pos, useGpu, &colNum, &rowCols, &rowValues); - ids->Reserve(colNum); - values->Reserve(colNum); - for (int i = 0; i < colNum; ++i) { - ids->AddAlreadyReserved(rowCols[i]); - values->AddAlreadyReserved(rowValues[i]); - } - break; - } - case SlotDef::VAR_MDIM_DENSE: - case SlotDef::VAR_MDIM_INDEX: { - LOG(FATAL) << "Not implemented"; - break; - } - case SlotDef::STRING: { - VectorSlot* vecSlot = sample->add_vector_slots(); - vecSlot->add_strs((*arg.strs)[pos]); - break; - } - } - slotid++; - } -} - -void writeData(const DataBatch& batch, bool useGpu, bool dataCompression) { - DataHeader header; - const vector& arguments = batch.getStreams(); - for (auto& argument : arguments) { - SlotDef* slotDef = header.add_slot_defs(); - slotDef->set_type(getSlotType(argument)); - slotDef->set_dim(getSlotDim(argument)); - } - VLOG(1) << "header=" << header.DebugString(); - - int64_t totalSeqs = batch.getNumSequences(); - int64_t seq = 0; - ICpuGpuVectorPtr sequenceStartPositions = arguments[0].sequenceStartPositions; - int64_t numWritten = 0; - vector curProtoFiles = - dataCompression ? protoFilesCompressed : protoFiles; - for (size_t i = 0; i < curProtoFiles.size(); ++i) { - int64_t numSeqs = totalSeqs * (i + 1) / curProtoFiles.size() - - totalSeqs * i / curProtoFiles.size(); - ofstream os(curProtoFiles[i]); - CHECK(os) << "Fail to open " << curProtoFiles[i]; - unique_ptr writer(new ProtoWriter(&os, dataCompression)); - CHECK(writer->write(header)); - for (int j = 0; j < numSeqs; ++j, ++seq) { - int64_t begin = seq; - int64_t end = seq + 1; - if (sequenceStartPositions) { - begin = sequenceStartPositions->getElement(seq); - end = sequenceStartPositions->getElement(seq + 1); - } - for (int pos = begin; pos < end; ++pos) { - DataSample sample; - makeSample(arguments, pos, pos == begin, &sample, useGpu); - CHECK(writer->write(sample)); - ++numWritten; - } - } - - writer.reset(nullptr); - os.close(); - } - CHECK_EQ(arguments[0].getBatchSize(), numWritten); -} - -// check that the sample at pos1 in args1 is same as the sample at pos2 in args2 -void checkSample(const vector& args1, - int64_t pos1, - const vector& args2, - int64_t pos2, - bool useGpu) { - EXPECT_EQ(args1.size(), args2.size()); - VLOG(1) << " pos1=" << pos1 << " pos2=" << pos2; - - for (size_t i = 0; i < args1.size(); ++i) { - auto type = getSlotType(args1[i]); - int dim = getSlotDim(args1[i]); - EXPECT_EQ(type, getSlotType(args2[i])); - if (type == SlotDef::INDEX) { - EXPECT_GE(dim, getSlotDim(args2[i])); - } else { - EXPECT_EQ(dim, getSlotDim(args2[i])); - } - switch (type) { - case SlotDef::VECTOR_DENSE: { - for (int j = 0; j < dim; ++j) { - EXPECT_EQ(static_cast(args1[i].value->getElement(pos1, j)), - static_cast(args2[i].value->getElement(pos2, j))); - } - break; - } - case SlotDef::INDEX: { - EXPECT_EQ(args1[i].ids->get(pos1), args2[i].ids->get(pos2)); - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: - case SlotDef::VECTOR_SPARSE_VALUE: { - int colNum1, colNum2; - const int *rowCols1, *rowCols2; - const real *rowValues1, *rowValues2; - getColRow(args1[i], pos1, useGpu, &colNum1, &rowCols1, &rowValues1); - getColRow(args2[i], pos2, useGpu, &colNum2, &rowCols2, &rowValues2); - EXPECT_EQ(colNum1, colNum2); - for (int j = 0; j < colNum1; ++j) { - EXPECT_EQ(rowCols1[j], rowCols2[j]); - if (type == SlotDef::VECTOR_SPARSE_VALUE) { - EXPECT_EQ(rowValues1[j], rowValues2[j]); - } - } - break; - } - case SlotDef::VAR_MDIM_DENSE: - case SlotDef::VAR_MDIM_INDEX: { - LOG(FATAL) << "Not implemented"; - break; - } - case SlotDef::STRING: { - EXPECT_EQ((*args1[i].strs)[pos1], (*args2[i].strs)[pos2]); - break; - } - } - } -} - -void testProtoDataProvider(int* numPerSlotType, - bool iid, - bool async, - bool useGpu, - bool dataCompression, - int numConstantSlots = 0) { - mkDir(kTestDir); - DataBatch data; - - prepareData(&data, numPerSlotType, iid, useGpu); - writeData(data, useGpu, dataCompression); - - DataConfig config; - config.set_type("proto"); - config.set_files(dataCompression ? kProtoFileListCompressed : kProtoFileList); - config.set_async_load_data(async); - - for (int i = 0; i < numConstantSlots; ++i) { - config.add_constant_slots(i + 11); - MatrixPtr w = Matrix::create(data.getSize(), - 1, - /* trans= */ false, - /* useGpu= */ false); - w->assign(config.constant_slots(i)); - data.appendData(w); - } - - unique_ptr dataProvider(DataProvider::create(config, useGpu)); - dataProvider->setSkipShuffle(); - - EXPECT_EQ(data.getSize(), dataProvider->getSize()); - - int64_t batchSize = 10; - DataBatch batch; - - size_t seq1 = 0; - vector& args1 = data.getStreams(); - ICpuGpuVectorPtr sequenceStartPositions1 = args1[0].sequenceStartPositions; - - dataProvider->reset(); - - while (dataProvider->getNextBatch(batchSize, &batch) > 0) { - CHECK_EQ(data.getNumStreams(), batch.getNumStreams()); - vector& args2 = batch.getStreams(); - ICpuGpuVectorPtr sequenceStartPositions2 = args2[0].sequenceStartPositions; - for (auto& arg : args2) { - EXPECT_EQ(iid, !arg.sequenceStartPositions); - } - size_t numSeqs = batch.getNumSequences(); - VLOG(1) << "numSeqs=" << numSeqs; - for (size_t seq2 = 0; seq2 < numSeqs; ++seq1, ++seq2) { - int64_t begin1 = seq1; - int64_t end1 = seq1 + 1; - if (sequenceStartPositions1) { - begin1 = sequenceStartPositions1->getElement(seq1); - end1 = sequenceStartPositions1->getElement(seq1 + 1); - EXPECT_LT(seq1, sequenceStartPositions1->getSize() - 1); - } - - int64_t begin2 = seq2; - int64_t end2 = seq2 + 1; - if (sequenceStartPositions2) { - begin2 = sequenceStartPositions2->getElement(seq2); - end2 = sequenceStartPositions2->getElement(seq2 + 1); - } - VLOG(1) << " begin1=" << begin1 << " end1=" << end1 - << " begin2=" << begin2 << " end2=" << end2; - EXPECT_EQ(end1 - begin1, end2 - begin2); - for (int i = 0; i < end1 - begin1; ++i) { - checkSample(args1, begin1 + i, args2, begin2 + i, useGpu); - } - } - } - - EXPECT_EQ(seq1, (size_t)data.getNumSequences()); - rmDir(kTestDir); -} - -TEST(ProtoDataProvider, test) { - int numSlotsArray[] = {0, 3}; - int numTwoArray[] = {0, 1}; - int numSlotsArraySize = sizeof(numSlotsArray) / sizeof(numSlotsArray[0]); - const int numSlot = 5; - int combination[numSlot] = {0}; - int k = numSlot - 1; - while (k >= 0) { - int numDenseVecSlots = numSlotsArray[combination[0]]; - int numSparseNonValueVecSlots = numSlotsArray[combination[1]]; - int numSparseValueVectorSlots = numSlotsArray[combination[2]]; - int numStrSlots = numSlotsArray[combination[3]]; - int numIdSlots = numSlotsArray[combination[4]]; - // while loop : traverse all cases - k = numSlot - 1; - while (k >= 0) { - if (combination[k] < (numSlotsArraySize - 1)) { - ++combination[k]; - break; - } else { - combination[k] = 0; - --k; - } - } - if (numDenseVecSlots + numSparseNonValueVecSlots + - numSparseValueVectorSlots + numStrSlots + numIdSlots < - 1) - continue; - for (int iid : numTwoArray) { - for (int async : numTwoArray) { - for (int useGpu : numTwoArray) { - for (int dataCompression : numTwoArray) { - if (async && useGpu) { - // Currently in async mode, useGpu is not supported - continue; - } -#ifndef PADDLE_WITH_CUDA - if (useGpu) { - continue; - } -#endif - LOG(INFO) << " numDenseVecSlots=" << numDenseVecSlots - << " numSparseNonValueVecSlots=" - << numSparseNonValueVecSlots - << " numSparseValueVectorSlots=" - << numSparseValueVectorSlots - << " numStrSlots=" << numStrSlots - << " numIdSlots=" << numIdSlots << " iid=" << iid - << " async=" << async << " useGpu=" << useGpu - << " dataCompression=" << dataCompression; - int numPerSlotType[SlotDef::SlotType_ARRAYSIZE] = {0}; - numPerSlotType[SlotDef::VECTOR_DENSE] = numDenseVecSlots; - numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE] = - numSparseNonValueVecSlots; - numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE] = - numSparseValueVectorSlots; - numPerSlotType[SlotDef::INDEX] = numIdSlots; - numPerSlotType[SlotDef::STRING] = numStrSlots; - testProtoDataProvider( - numPerSlotType, iid, async, useGpu, dataCompression); - } // end for (int dataCompression : numTwoArray) - } // end for (int useGpu : numTwoArray) - } // end for (int async : numTwoArray) - } // end for (int iid : numTwoArray) - } // end for (while, traverse all slots) -} - -TEST(ProtoDataProvider, constant_slots) { - int numSlotsArray[] = {0, 3}; - int numTwoArray[] = {0, 1}; - for (int numDenseVecSlots : numSlotsArray) { - for (int numSparseNonValueVecSlots : numSlotsArray) { - if (numDenseVecSlots + numSparseNonValueVecSlots < 1) continue; - for (int numConstantSlots : {1, 2}) { - for (int useGpu : numTwoArray) { - for (int dataCompression : numTwoArray) { -#ifndef PADDLE_WITH_CUDA - if (useGpu) { - continue; - } -#endif - LOG(INFO) << " numDenseVecSlots=" << numDenseVecSlots - << " numSparseNonValueVecSlots=" - << numSparseNonValueVecSlots - << " numConstantSlogs=" << numConstantSlots - << " useGpu=" << useGpu - << " dataCompression=" << dataCompression; - int numPerSlotType[SlotDef::SlotType_ARRAYSIZE] = {0}; - numPerSlotType[SlotDef::VECTOR_DENSE] = numDenseVecSlots; - numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE] = - numSparseNonValueVecSlots; - numPerSlotType[SlotDef::VECTOR_SPARSE_VALUE] = 1; - numPerSlotType[SlotDef::INDEX] = 1; - testProtoDataProvider(numPerSlotType, - /* iid= */ true, - /* async= */ false, - useGpu, - dataCompression, - numConstantSlots); - } // end for (int dataCompression : numTwoArray) - } // end for (int useGpu : numTwoArray) - } // end for (int numConstantSlots : {1, 2}) - } // end for (int numSparseNonValueVecSlots : numSlotsArray) - } // end for (int numDenseVecSlots : numSlotsArray) -} - -void checkSampleSequence(const vector& args1, - const vector& args2, - int64_t offset, - int64_t numSeqs, - bool useGpu) { - // check slot num are equal - EXPECT_EQ(args1.size(), args2.size()); - for (size_t i = 0; i < args1.size(); i++) { - auto type = getSlotType(args1[i]); - // check for args2: sequenceStartPositions vs numSeqs - // (1) size - EXPECT_EQ(args2[i].sequenceStartPositions->getSize(), (size_t)numSeqs + 1); - // (2) content - auto checkArgContent = [&](const Argument& args, int numSeqs) { - for (int j = 0; j <= numSeqs; j++) { - int start_pos = args.sequenceStartPositions->getElement(j); - EXPECT_EQ(start_pos, j); - } - }; - switch (type) { - case SlotDef::INDEX: { - // args1: for label - checkArgContent(args2[i], numSeqs); - // check for args2: ids are equal to args1[offset] - // (1) size - EXPECT_EQ(args2[i].ids->getSize(), (size_t)numSeqs); - // (2) content - for (int j = 0; j < numSeqs; j++) { - EXPECT_EQ(args2[i].ids->get(j), args1[i].ids->get(offset + j)); - } - break; - } - case SlotDef::VECTOR_SPARSE_NON_VALUE: { - // args1: for sparse_non_value - // args2 should put sparse indexes in ids - int colNum1; - const int* rowCols1; - const real* rowValues1; // nullptr - int totalLength = 0; - for (int j = 0; j < numSeqs; j++) { - getColRow( - args1[i], offset + j, useGpu, &colNum1, &rowCols1, &rowValues1); - // (1) lengths - EXPECT_EQ(totalLength, - args2[i].sequenceStartPositions->getElement(j)); - EXPECT_EQ(totalLength, - args2[i].subSequenceStartPositions->getElement(j)); - // (2) content - for (int k = 0; k < colNum1; k++) { - EXPECT_EQ(rowCols1[k], args2[i].ids->get(totalLength + k)); - } - totalLength += colNum1; - if (colNum1 == 0) { - // special case here: we will put a "-1" into ids when column num is - // zero. see ProtoSequenceDataProvider::getNextBatchInternal. - EXPECT_EQ(-1, args2[i].ids->get(totalLength)); - totalLength++; - } - } - EXPECT_EQ(totalLength, - args2[i].sequenceStartPositions->getElement(numSeqs)); - EXPECT_EQ(totalLength, - args2[i].subSequenceStartPositions->getElement(numSeqs)); - break; - } - case SlotDef::VECTOR_DENSE: { - // args1: for dense vector - checkArgContent(args2[i], numSeqs); - // check for args2: values are equal to args1[offset] - // (1) size - EXPECT_EQ(args2[i].value->getHeight(), (size_t)numSeqs); - EXPECT_EQ(args2[i].value->getWidth(), (size_t)getSlotDim(args1[i])); - // (2) content - for (int j = 0; j < numSeqs; j++) { - for (size_t k = 0; k < args2[i].value->getWidth(); k++) { - EXPECT_EQ( - static_cast(args1[i].value->getElement(j + offset, k)), - static_cast(args2[i].value->getElement(j, k))); - } - } - break; - } - default: { EXPECT_EQ(true, false) << "should not reach here"; } - } - } -} - -void testProtoSequenceDataProvider(int* numPerSlotType, - bool async, - bool useGpu) { - mkDir(kTestDir); - DataBatch data; - - prepareData(&data, - numPerSlotType, - /* iid */ true, - useGpu); - writeData(data, useGpu, /* dataCompression */ false); - - DataConfig config; - config.set_type("proto_sequence"); - config.set_files(kProtoFileList); - config.set_async_load_data(async); - - unique_ptr dataProvider(DataProvider::create(config, useGpu)); - dataProvider->setSkipShuffle(); - - EXPECT_EQ(data.getSize(), dataProvider->getSize()); - - int64_t batchSize = 10; - DataBatch batch; - - vector& args1 = data.getStreams(); - ICpuGpuVectorPtr sequenceStartPositions1 = args1[0].sequenceStartPositions; - - dataProvider->reset(); - - size_t args1Offset = 0; - while (dataProvider->getNextBatch(batchSize, &batch) > 0) { - CHECK_EQ(data.getNumStreams(), batch.getNumStreams()); - vector& args2 = batch.getStreams(); - ICpuGpuVectorPtr sequenceStartPositions2 = args2[0].sequenceStartPositions; - for (auto& arg : args1) { - // args1 should not has sequence - EXPECT_EQ(true, !arg.sequenceStartPositions); - } - for (auto& arg : args2) { - // args2 should has sequence - EXPECT_NE(true, !arg.sequenceStartPositions); - } - size_t numSeqs = batch.getNumSequences(); - checkSampleSequence(args1, args2, args1Offset, numSeqs, useGpu); - args1Offset += numSeqs; - } - - EXPECT_EQ(args1Offset, (size_t)data.getNumSequences()); - rmDir(kTestDir); -} - -TEST(ProtoSequenceDataProvider, test) { - int numSlotsArray[] = {0, 3}; - int numTwoArray[] = {0, 1}; - for (int numSparseNonValueVecSlots : numSlotsArray) { - for (int numIdSlots : numSlotsArray) { - for (int numDenseVecSlots : numSlotsArray) { - if (numDenseVecSlots + numSparseNonValueVecSlots + numIdSlots < 1) - continue; - for (int async : numTwoArray) { - for (int useGpu : numTwoArray) { - if (async && useGpu) { - // Currently in async mode, useGpu is not supported - continue; - } -#ifndef PADDLE_WITH_CUDA - if (useGpu) { - continue; - } -#endif - LOG(INFO) << " numDenseVecSlots=" << numDenseVecSlots - << " numSparseNonValueVecSlots=" - << numSparseNonValueVecSlots - << " numIdSlots=" << numIdSlots << " async=" << async - << " useGpu=" << useGpu; - int numPerSlotType[SlotDef::SlotType_ARRAYSIZE] = {0}; - numPerSlotType[SlotDef::VECTOR_DENSE] = numDenseVecSlots; - numPerSlotType[SlotDef::VECTOR_SPARSE_NON_VALUE] = - numSparseNonValueVecSlots; - numPerSlotType[SlotDef::INDEX] = numIdSlots; - testProtoSequenceDataProvider(numPerSlotType, async, useGpu); - } // end for (int useGpu : numTwoArray) - } // end for (int async : numTwoArray) - } // end for (int numDenseVecSlots : numSlotsArray) - } // end for (int numIdSlots : numSlotsArray) - } // end for (int numSparseNonValueVecSlots : numSlotsArray) -} diff --git a/paddle/math/Storage.cpp b/paddle/math/Storage.cpp index 4adaaef9838f0d178468af3af142031325bfc11d..a2ef731ecbcd18ca4bd0b2381de04650a2686c2d 100644 --- a/paddle/math/Storage.cpp +++ b/paddle/math/Storage.cpp @@ -17,9 +17,13 @@ limitations under the License. */ #include "paddle/utils/StringUtil.h" #include "paddle/utils/Util.h" +#ifndef PADDLE_MOBILE_INFERENCE DEFINE_int32(pool_limit_size, 536870912, "maximum memory size managed by a memory pool, default is 512M"); +#else +DEFINE_int32(pool_limit_size, 0, "default is 0"); +#endif namespace paddle { diff --git a/paddle/memory/README.md b/paddle/memory/README.md index 7f95e80f980b0c0b93ecb418e6b923045313eaa5..6cb003c50bc7d142d65b0591e7e5235431d2ea42 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -1,4 +1,141 @@ # Region-based Heterogeneous Memory Management +## Design -Please check out the [design documentation](http://gangliao.me) to find out more details about -buddy memory allocator for both CPU and GPU. +### Usage + +To allocate 4KB CPU memory: + +```cpp +p = memory::Alloc(platform::CPUPlace(), 4*1024); +``` + +To allocate 4KB memory on the 3rd GPU: + +```cpp +p = memory::Alloc(platform::GPUPlace(2), 4*1024); +``` + +To free memory and check the so-far used amount of memory on a place: + +```cpp +auto pl = platform::GPUPlace(0); +p = memory::Alloc(pl, 4*1024); +cout << memory::Used(pl); +memory::Free(pl, p); +``` + +### API + +In `paddle/memory/memory.h` we have: + +```cpp +namespace memory { +template void* Alloc(Place, size_t); +template void Free(Place, void*); +template size_t Used(Place); +} // namespace memory +``` + +These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: + +```cpp +template<> +void* Alloc(CPUPlace p, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} +``` + +and + +```cpp +template<> +void Alloc(GPUPlace p, size_t size) { + return GetGPUBuddyAllocator(p.id)->Alloc(size); +} +``` + +Similar specializations exist for `Free` and `Used`. + +### Implementation + +`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. + +```cpp +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = NULL; + if (a == NULL) { + a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...); + } + return a; +} + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator* as = NULL; + if (as == NULL) { + as = new BuddyAllocator*[platform::NumGPUs()]; + for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) { + as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...); + } + } + return as[gpu_id); +``` + +#### `BuddyAllocator` + +`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm: + +```cpp +BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) { + ... +} +``` + +Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object: + +```cpp +class BuddyAllocator { + private: + struct Block { + size_t size; + Block* left, right; + size_t index; // allocator id + }; + ... +}; +``` + +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. + +#### System Allocators + +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. + +## Justification + +I got inspiration from Majel and Caffe2, though above design look different from both. + +### Caffe2 + +In Caffe2, `Tensor::mutable_data()` allocates the memroy. In particular, [`Tensor::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479). + +There are two implementations of `Context`: + +1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. + +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. + +### Majel + +In Majel, there are basically two allocator types: + +1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`. +1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`. + +However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces. + +In Majel there are hidden global variables like: + +1. `cpu::SystemAllocator g_cpu_allocator`, and +1. `vector g_gpu_allocators(NUM_GPUS)`. + +Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`. diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 709f7de2e43093114d096cbfca5b5d49293a6d3e..059a6bba84cfb0c1f6cbbba3c88d589b52dc5592 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -9,6 +9,7 @@ function(op_library TARGET) set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(cc_srcs) set(cu_srcs) + set(cu_cc_srcs) set(op_common_deps operator op_registry math_function) set(options "") set(oneValueArgs "") @@ -22,6 +23,9 @@ function(op_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) list(APPEND cc_srcs ${TARGET}.cc) endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND cu_cc_srcs ${TARGET}.cu.cc) + endif() if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu) endif() @@ -29,6 +33,8 @@ function(op_library TARGET) foreach(src ${op_library_SRCS}) if (${src} MATCHES ".*\\.cu$") list(APPEND cu_srcs ${src}) + elseif(${src} MATCHES ".*\\.cu.cc$") + list(APPEND cu_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cc$") list(APPEND cc_srcs ${src}) else() @@ -43,7 +49,7 @@ function(op_library TARGET) endif() if (WITH_GPU) - nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) else() cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} @@ -55,6 +61,18 @@ function(op_library TARGET) set(pybind_flag 1) endif() + if ("${TARGET}" STREQUAL "compare_op") + set(pybind_flag 1) + file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") + endif() + + # conv_op contains several operators + if ("${TARGET}" STREQUAL "conv_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d);\n") + endif() + # pool_op contains several operators if ("${TARGET}" STREQUAL "pool_op") set(pybind_flag 1) @@ -62,23 +80,23 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(pool2d);\n") endif() - if ("${TARGET}" STREQUAL "compare_op") + # pool_cudnn_op contains several operators + if ("${TARGET}" STREQUAL "pool_cudnn_op") set(pybind_flag 1) - file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") endif() - # pool_with_index_op contains several operators - if ("${TARGET}" STREQUAL "pool_with_index_op") + if ("${TARGET}" STREQUAL "logical_op") set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") + file(APPEND ${pybind_file} "USE_OP(logical_and);\n") endif() - # conv_op contains several operators - if ("${TARGET}" STREQUAL "conv_op") + # pool_with_index_op contains several operators + if ("${TARGET}" STREQUAL "pool_with_index_op") set(pybind_flag 1) # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d);\n") + file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") endif() # conv_transpose_op contains several operators @@ -87,12 +105,12 @@ function(op_library TARGET) # It's enough to just adding one operator to pybind file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n") endif() - - # pool_cudnn_op contains several operators - if ("${TARGET}" STREQUAL "pool_cudnn_op") + + # conv_transpose_cudnn_op contains two operators + if ("${TARGET}" STREQUAL "conv_transpose_cudnn_op") set(pybind_flag 1) # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") + file(APPEND ${pybind_file} "USE_OP(conv2d_transpose_cudnn);\n") endif() # save_restore_op contains several operators @@ -140,7 +158,9 @@ function(op_library TARGET) # pybind USE_CPU_ONLY_OP list(LENGTH cu_srcs cu_srcs_len) - if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0) + list(LENGTH cu_cc_srcs cu_cc_srcs_len) + + if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0) file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") set(pybind_flag 1) endif() @@ -160,11 +180,13 @@ set(DEPS_OPS recurrent_op dynamic_recurrent_op softmax_with_cross_entropy_op + softmax_op + sequence_softmax_op sum_op pool_op + maxout_op pool_with_index_op conv_op - lstm_op conv_transpose_op nccl_op sequence_conv_op @@ -174,14 +196,22 @@ set(DEPS_OPS array_to_lod_tensor_op lstm_op tensor_array_read_write_op - gru_op) + gru_op + adagrad_op + sgd_op) + op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) +op_library(softmax_op DEPS softmax) +op_library(sequence_softmax_op DEPS softmax) +op_library(sum_op DEPS selected_rows_functor) +op_library(sgd_op DEPS selected_rows_functor) +op_library(adagrad_op DEPS selected_rows_functor) op_library(conv_op DEPS vol2col) -op_library(sum_op DEPS net_op selected_rows_functor) op_library(pool_op DEPS pooling) +op_library(maxout_op DEPS maxouting) op_library(pool_with_index_op DEPS pooling) op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) @@ -220,6 +250,6 @@ cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc rnn/recurrent_op_utils.cc DEPS dynamic_recurrent_op) if(WITH_GPU) - nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context) + cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index 03c2fa945d94a522d25e65103c8842a93852ba3d..2785a8c6fb62527db4d203788be88ebead068a19 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel { "Input (Label) of accuracy op should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Accuracy"), "Output (Accuracy) of AccuracyOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Correct"), + "Output (Correct) of AccuracyOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Total"), + "Output (Total) of AccuracyOp should not be null."); auto inference_dim = ctx->GetInputDim("Out"); auto label_dim = ctx->GetInputDim("Label"); @@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel { " the same as label."); ctx->SetOutputDim("Accuracy", {1}); + ctx->SetOutputDim("Correct", {1}); + ctx->SetOutputDim("Total", {1}); ctx->ShareLoD("Out", /*->*/ "Accuracy"); } @@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Label", "Label of the training data"); // TODO(typhoonzero): AddInput("Weight", ... AddOutput("Accuracy", "The accuracy of current batch"); + AddOutput("Correct", "The correct samples count of current batch"); + AddOutput("Total", "The samples count of current batch"); AddComment(R"DOC( Accuracy Operator. diff --git a/paddle/operators/accuracy_op.cu b/paddle/operators/accuracy_op.cu index 1776f33105367447759aa91c25263dfc53bd2f99..d2dcab4e548b99c6beecfaa570ac31804fd07d82 100644 --- a/paddle/operators/accuracy_op.cu +++ b/paddle/operators/accuracy_op.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/operators/accuracy_op.h" #include "paddle/platform/cuda_helper.h" +#include "paddle/platform/gpu_info.h" namespace paddle { namespace operators { @@ -24,7 +25,8 @@ using platform::PADDLE_CUDA_NUM_THREADS; template __global__ void AccuracyCudaKernel(const int N, const int D, const int64_t* Xdata, - const int64_t* labeldata, float* accuracy) { + const int64_t* labeldata, int* correct_data, + float* accuracy) { int count = 0; __shared__ int total[BlockSize]; @@ -43,6 +45,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, // reduce the count with init value 0, and output accuracy. int result = thrust::reduce(thrust::device, total, total + BlockSize, 0); if (threadIdx.x == 0) { + *correct_data = result; *accuracy = static_cast(result) / static_cast(N); } } @@ -56,31 +59,50 @@ class AccuracyOpCUDAKernel : public framework::OpKernel { auto* inference = ctx.Input("Out"); auto* indices = ctx.Input("Indices"); auto* label = ctx.Input("Label"); + auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); // FIXME(typhoonzero): only support indices currently // if add support for output values, how to detect the data type? const int64_t* indices_data = indices->data(); const int64_t* label_data = label->data(); + + int* correct_data = correct->mutable_data(ctx.GetPlace()); + int* total_data = total->mutable_data(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data(ctx.GetPlace()); - size_t num_samples = inference->dims()[0]; + int num_samples = static_cast(inference->dims()[0]); size_t infer_width = inference->dims()[1]; - PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float))); + auto stream = ctx.cuda_device_context().stream(); + platform::GpuMemsetAsync(accuracy_data, 0, sizeof(float), stream); if (num_samples == 0) { return; } + platform::GpuMemcpyAsync(total_data, &num_samples, sizeof(int), + cudaMemcpyHostToDevice, stream); + + AccuracyCudaKernel< + PADDLE_CUDA_NUM_THREADS><<<1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + num_samples, infer_width, indices_data, label_data, correct_data, + accuracy_data); - AccuracyCudaKernel<<< - 1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>( - num_samples, infer_width, indices_data, label_data, accuracy_data); + int d_num_samples, d_num_correct; + float d_accuracy; + platform::GpuMemcpyAsync(&d_num_correct, correct_data, sizeof(int), + cudaMemcpyDeviceToHost, stream); + platform::GpuMemcpyAsync(&d_num_samples, total_data, sizeof(int), + cudaMemcpyDeviceToHost, stream); + platform::GpuMemcpyAsync(&d_accuracy, accuracy_data, sizeof(float), + cudaMemcpyDeviceToHost, stream); } }; } // namespace operators } // namespace paddle -// FIXME(typhoonzero): types of T is for infernece data. -// label data is always int +// FIXME(typhoonzero): types of T is for inference data. +// label data is always int64 REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel, paddle::operators::AccuracyOpCUDAKernel); diff --git a/paddle/operators/accuracy_op.h b/paddle/operators/accuracy_op.h index 28dbc77f64842a62e88ae8df4ead7adc3b03764b..d060e6edddb31ecc1a4d27836f80b8ac5fa7d36d 100644 --- a/paddle/operators/accuracy_op.h +++ b/paddle/operators/accuracy_op.h @@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel { auto* indices = ctx.Input("Indices"); auto* label = ctx.Input("Label"); auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); + int* correct_data = correct->mutable_data(ctx.GetPlace()); + int* total_data = total->mutable_data(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data(ctx.GetPlace()); const int64_t* indices_data = indices->data(); @@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel { } } - // FIXME(typhoonzero): we don't accumulate the accuracy for now. + *correct_data = num_correct; + *total_data = num_samples; *accuracy_data = static_cast(num_correct) / static_cast(num_samples); } diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 83d35a450d0e8ebf5311cdfd948b066642ccec8c..c66d575d24bb6b410602c34965ab1db6bc81b41d 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -98,7 +98,6 @@ $y = \max(x, 0)$ } }; -template class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { public: LeakyReluOpMaker(framework::OpProto *proto, @@ -106,8 +105,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Y", "Output of LeakyRelu operator"); - AddAttr("alpha", "The small negative slope") - .SetDefault(static_cast(0.02f)); + AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( LeakyRelu Activation Operator. @@ -117,7 +115,6 @@ $y = \max(x, \alpha * x)$ } }; -template class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftShrinkOpMaker(framework::OpProto *proto, @@ -125,8 +122,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); AddOutput("Y", "Output of Softshrink operator"); - AddAttr("lambda", "non-negative offset") - .SetDefault(static_cast(0.5f)); + AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( Softshrink Activation Operator. @@ -173,7 +169,6 @@ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ } }; -template class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: HardShrinkOpMaker(framework::OpProto *proto, @@ -181,8 +176,8 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); AddOutput("Y", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") - .SetDefault(static_cast(0.5)); + AddAttr("threshold", "The value of threshold for HardShrink") + .SetDefault(0.5f); AddComment(R"DOC( HardShrink Activation Operator. @@ -308,17 +303,16 @@ $$y = \frac{x}{1 + |x|}$$ } }; -template class BReluOpMaker : public framework::OpProtoAndCheckerMaker { public: BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); AddOutput("Y", "Output of BRelu operator"); - AddAttr("t_min", "The min marginal value of BRelu") - .SetDefault(static_cast(0)); - AddAttr("t_max", "The max marginal value of BRelu") - .SetDefault(static_cast(24)); + AddAttr("t_min", "The min marginal value of BRelu") + .SetDefault(static_cast(0)); + AddAttr("t_max", "The max marginal value of BRelu") + .SetDefault(static_cast(24)); AddComment(R"DOC( BRelu Activation Operator. @@ -328,7 +322,6 @@ $y = \max(\min(x, t_{min}), t_{max})$ } }; -template class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftReluOpMaker(framework::OpProto *proto, @@ -336,8 +329,8 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); AddOutput("Y", "Output of SoftRelu operator"); - AddAttr("threshold", "The threshold value of SoftRelu") - .SetDefault(static_cast(40)); + AddAttr("threshold", "The threshold value of SoftRelu") + .SetDefault(40.0f); AddComment(R"DOC( SoftRelu Activation Operator. @@ -347,15 +340,13 @@ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ } }; -template class ELUOpMaker : public framework::OpProtoAndCheckerMaker { public: ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ELU operator"); AddOutput("Y", "Output of ELU operator"); - AddAttr("alpha", "The alpha value of ELU") - .SetDefault(static_cast(1.0f)); + AddAttr("alpha", "The alpha value of ELU").SetDefault(1.0f); AddComment(R"DOC( ELU Activation Operator. @@ -368,15 +359,14 @@ $y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ } }; -template class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { public: Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); AddOutput("Y", "Output of Relu6 operator"); - AddAttr("threshold", "The threshold value of Relu6") - .SetDefault(static_cast(6)); + AddAttr("threshold", "The threshold value of Relu6") + .SetDefault(6.0f); AddComment(R"DOC( Relu6 Activation Operator. @@ -386,15 +376,13 @@ $y = \min(\max(0, x), 6)$ } }; -template class PowOpMaker : public framework::OpProtoAndCheckerMaker { public: PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); AddOutput("Y", "Output of Pow operator"); - AddAttr("factor", "The exponential factor of Pow") - .SetDefault(static_cast(1)); + AddAttr("factor", "The exponential factor of Pow").SetDefault(1.0f); AddComment(R"DOC( Pow Activation Operator. @@ -404,17 +392,16 @@ $y = x^{factor}$ } }; -template class STanhOpMaker : public framework::OpProtoAndCheckerMaker { public: STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); AddOutput("Y", "Output of STanh operator"); - AddAttr("scale_a", "The scale parameter of a for the input") - .SetDefault(static_cast(2 / 3)); - AddAttr("scale_b", "The scale parameter of b for the input") - .SetDefault(static_cast(1.7159)); + AddAttr("scale_a", "The scale parameter of a for the input") + .SetDefault(2.0f / 3.0f); + AddAttr("scale_b", "The scale parameter of b for the input") + .SetDefault(1.7159f); AddComment(R"DOC( STanh Activation Operator. @@ -424,7 +411,6 @@ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ } }; -template class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { public: ThresholdedReluOpMaker(framework::OpProto *proto, @@ -432,8 +418,8 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); AddOutput("Y", "Output of ThresholdedRelu operator"); - AddAttr("threshold", "The threshold location of activation") - .SetDefault(static_cast(1.0)); + AddAttr("threshold", "The threshold location of activation") + .SetDefault(1.0f); AddComment(R"DOC( ThresholdedRelu Activation Operator. @@ -448,7 +434,6 @@ $$ } }; -template class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: HardSigmoidOpMaker(framework::OpProto *proto, @@ -456,10 +441,10 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); AddOutput("Y", "Output of HardSigmoid operator"); - AddAttr("slope", "Slope for linear approximation of sigmoid") - .SetDefault(static_cast(0.2)); - AddAttr("offset", "Offset for linear approximation of sigmoid") - .SetDefault(static_cast(0.5)); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(0.2f); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(0.5f); AddComment(R"DOC( HardSigmoid Activation Operator. @@ -499,7 +484,7 @@ REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad, REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, tanh_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, +REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, softshrink_grad, ops::ActivationOpGrad); REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, @@ -523,35 +508,34 @@ REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad, REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, ops::ActivationOpGrad); -REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, +REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, ops::ActivationOpGrad); -REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, +REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, leaky_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, - soft_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad, + ops::ActivationOpGrad); -REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, +REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, ops::ActivationOpGrad); -REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, +REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, ops::ActivationOpGrad); -REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, +REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, ops::ActivationOpGrad); -REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, +REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, +REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, hard_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(thresholded_relu, ops::ActivationOp, - ops::ThresholdedReluOpMaker, thresholded_relu_grad, - ops::ActivationOpGrad); +REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, + thresholded_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, hard_sigmoid_grad, ops::ActivationOpGrad); #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ diff --git a/paddle/operators/adadelta_op.cc b/paddle/operators/adadelta_op.cc index b717e1647e4b89285b841420650dc69e8a1e0c58..16a7794d5b7bf1d56cd9f5874454c41cab43b41f 100644 --- a/paddle/operators/adadelta_op.cc +++ b/paddle/operators/adadelta_op.cc @@ -109,4 +109,5 @@ paramOut = param + paramUpdate$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adadelta, ops::AdadeltaOp, ops::AdadeltaOpMaker); REGISTER_OP_CPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.cu b/paddle/operators/adadelta_op.cu index 3af1c8c8e9861138a33b3156818f704c3b20363f..9fb61852071f11670b8bc51321bb0881de196777 100644 --- a/paddle/operators/adadelta_op.cu +++ b/paddle/operators/adadelta_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.h b/paddle/operators/adadelta_op.h index d29e15c43583bd447fbacb548a326f303f7d1463..a8c5f0c8aa20ce506f5279fa696079ba64034bd5 100644 --- a/paddle/operators/adadelta_op.h +++ b/paddle/operators/adadelta_op.h @@ -33,8 +33,8 @@ class AdadeltaOpKernel : public framework::OpKernel { avg_squared_grad_out_tensor->mutable_data(ctx.GetPlace()); avg_squared_update_out_tensor->mutable_data(ctx.GetPlace()); - float rho = ctx.Attr("rho"); - float epsilon = ctx.Attr("epsilon"); + T rho = static_cast(ctx.Attr("rho")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adagrad_op.cc b/paddle/operators/adagrad_op.cc index 8d1a2b7938d2c6607cbeb3cecb72d1d5b83dd8b9..d6686e3ef3165976cf4c077a7a0f213082aa7716 100644 --- a/paddle/operators/adagrad_op.cc +++ b/paddle/operators/adagrad_op.cc @@ -14,6 +14,11 @@ limitations under the License. */ #include "paddle/operators/adagrad_op.h" +#include + +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" + namespace paddle { namespace operators { @@ -21,7 +26,7 @@ class AdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext *ctx) const override { + void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("Param"), "Input(Param) of AdagradOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("Grad"), @@ -54,8 +59,8 @@ class AdagradOp : public framework::OperatorWithKernel { class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { public: - AdagradOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + AdagradOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Param", "(Tensor) Input parameter"); AddInput("Grad", "(Tensor) Input gradient"); @@ -87,10 +92,85 @@ for numerical stability to avoid the division by zero error. )DOC"); } }; + +namespace { +size_t FindPos(const std::vector& rows, int64_t value) { + return std::find(rows.begin(), rows.end(), value) - rows.begin(); +} +} // namespace + +template +struct SparseAdagradFunctor { + void operator()(const platform::DeviceContext& context, + const framework::SelectedRows& grad, + const framework::Tensor& learning_rate, T epsilon, + framework::Tensor* moment, framework::Tensor* param) { + // 1. g_m.rows = set(g.rows) + auto grad_rows = grad.rows(); + std::set row_set(grad_rows.begin(), grad_rows.end()); + std::vector merge_rows(row_set.begin(), row_set.end()); + + auto grad_width = grad.value().dims()[1]; + std::unique_ptr grad_merge{ + new framework::SelectedRows()}; + grad_merge->set_rows(merge_rows); + grad_merge->set_height(grad.height()); + grad_merge->mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merge_rows.size()), grad_width}), + context.GetPlace()); + + math::SetConstant constant_functor; + constant_functor(context, grad_merge->mutable_value(), 0.0); + + auto* grad_merge_data = grad_merge->mutable_value()->data(); + auto* grad_data = grad.value().data(); + + for (size_t i = 0; i < grad_rows.size(); i++) { + size_t grad_merge_i = FindPos(merge_rows, grad_rows[i]); + for (int64_t j = 0; j < grad_width; j++) { + grad_merge_data[grad_merge_i * grad_width + j] += + grad_data[i * grad_width + j]; + } + } + + // 2. m += g_m * g_m + std::unique_ptr grad_square{ + new framework::SelectedRows()}; + grad_square->set_rows(grad_merge->rows()); + grad_square->set_height(grad_merge->height()); + grad_square->mutable_value()->mutable_data(grad_merge->value().dims(), + context.GetPlace()); + auto gs = + framework::EigenVector::Flatten(*(grad_square->mutable_value())); + auto gm = framework::EigenVector::Flatten(grad_merge->value()); + gs.device(*context.GetEigenDevice()) = gm * gm; + + math::SelectedRowsAddToTensor functor; + functor(context, *grad_square, moment); + + // 3. update parameter + auto* lr = learning_rate.data(); + auto* param_data = param->data(); + auto* moment_data = moment->data(); + + for (size_t i = 0; i < merge_rows.size(); i++) { + for (int64_t j = 0; j < grad_width; j++) { + param_data[merge_rows[i] * grad_width + j] -= + lr[0] * grad_merge_data[i * grad_width + j] / + (std::sqrt(moment_data[merge_rows[i] * grad_width + j]) + epsilon); + } + } + } +}; + +template struct SparseAdagradFunctor; +template struct SparseAdagradFunctor; } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker); -REGISTER_OP_CPU_KERNEL(adagrad, - ops::AdagradOpKernel); +REGISTER_OP_CPU_KERNEL( + adagrad, ops::AdagradOpKernel, + ops::AdagradOpKernel); diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index a5b7951121360f78612f9008a522235104708112..1c870214b29dbfcabb7414317b1214d6bef369cb 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -14,7 +14,138 @@ #define EIGEN_USE_GPU #include "paddle/operators/adagrad_op.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +namespace { + +template +__global__ void MergeGradKernel(const T* grad, const int64_t* grad_rows, + T* grad_merge, const int64_t* grad_merge_rows, + size_t grad_merge_rows_size, + int64_t row_numel) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + __shared__ size_t grad_merge_idx; + + if (tid == 0) { + for (size_t i = 0; i < grad_merge_rows_size; i++) { + if (grad_rows[ty] == grad_merge_rows[i]) { + grad_merge_idx = i; + } + } + } + + __syncthreads(); + + grad += ty * row_numel; + grad_merge += grad_merge_idx * row_numel; + for (int index = tid; index < row_numel; index += block_size) { + paddle::platform::CudaAtomicAdd(grad_merge + index, grad[index]); + } +} + +template +__global__ void SparseAdagradFunctorKernel(const T* grad, const int64_t* rows, + const T* learning_rate, T* param, + T* moment, int64_t row_numel, + T epsilon) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + grad += ty * row_numel; + param += rows[ty] * row_numel; + moment += rows[ty] * row_numel; + + for (int index = tid; index < row_numel; index += block_size) { + // Since index in rows of SelectedRows can be duplicate, we have to use + // Atomic Operation to avoid concurrent write error. + paddle::platform::CudaAtomicAdd(param + index, + -1.0 * learning_rate[0] * grad[index] / + (sqrt(moment[index]) + epsilon)); + } +} +} // namespace + +template +struct SparseAdagradFunctor { + void operator()(const platform::DeviceContext& context, + const framework::SelectedRows& grad, + const framework::Tensor& learning_rate, T epsilon, + framework::Tensor* moment, framework::Tensor* param) { + // 1. g_m.rows = set(g.rows) + auto grad_rows = grad.rows(); + std::set row_set(grad_rows.begin(), grad_rows.end()); + std::vector merge_rows(row_set.begin(), row_set.end()); + + auto grad_width = grad.value().dims()[1]; + std::unique_ptr grad_merge{ + new framework::SelectedRows()}; + grad_merge->set_rows(merge_rows); + grad_merge->set_height(grad.height()); + grad_merge->mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merge_rows.size()), grad_width}), + context.GetPlace()); + + math::SetConstant constant_functor; + constant_functor(context, grad_merge->mutable_value(), 0.0); + + auto* grad_merge_data = grad_merge->mutable_value()->data(); + auto* grad_data = grad.value().data(); + + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid1(1, grad_rows.size()); + + MergeGradKernel< + T, 256><<(context) + .stream()>>>(grad_data, grad.rows().data(), + grad_merge_data, grad_merge->rows().data(), + grad_merge->rows().size(), grad_width); + + // 2. m += g_m * g_m + std::unique_ptr grad_square{ + new framework::SelectedRows()}; + grad_square->set_rows(grad_merge->rows()); + grad_square->set_height(grad_merge->height()); + grad_square->mutable_value()->mutable_data(grad_merge->value().dims(), + context.GetPlace()); + auto gs = + framework::EigenVector::Flatten(*(grad_square->mutable_value())); + auto gm = framework::EigenVector::Flatten(grad_merge->value()); + gs.device(*context.GetEigenDevice()) = gm * gm; + + math::SelectedRowsAddToTensor functor; + functor(context, *grad_square, moment); + + // 3. update parameter + auto* lr = learning_rate.data(); + auto* param_data = param->data(); + auto* moment_data = moment->data(); + + dim3 grid2(1, merge_rows.size()); + SparseAdagradFunctorKernel< + T, 256><<(context) + .stream()>>>(grad_merge_data, grad_merge->rows().data(), + lr, param_data, moment_data, grad_width, + epsilon); + } +}; + +template struct SparseAdagradFunctor; +template struct SparseAdagradFunctor; + +} // namespace operators +} // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(adagrad, - ops::AdagradOpKernel); +REGISTER_OP_GPU_KERNEL( + adagrad, ops::AdagradOpKernel, + ops::AdagradOpKernel); diff --git a/paddle/operators/adagrad_op.h b/paddle/operators/adagrad_op.h index c5d8f751d3527f89b96d4274328ba0bb5f6efa44..4d4a6434c7c472d8ceb01edfc4050fbb009d6c9f 100644 --- a/paddle/operators/adagrad_op.h +++ b/paddle/operators/adagrad_op.h @@ -19,35 +19,59 @@ limitations under the License. */ namespace paddle { namespace operators { +template +struct SparseAdagradFunctor { + void operator()(const platform::DeviceContext& context, + const framework::SelectedRows& grad, + const framework::Tensor& learning_rate, T epsilon, + framework::Tensor* moment, framework::Tensor* param); +}; + template class AdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param_out_tensor = ctx.Output("ParamOut"); - auto moment_out_tensor = ctx.Output("MomentOut"); + auto* param_out_tensor = ctx.Output("ParamOut"); + auto* moment_out_tensor = ctx.Output("MomentOut"); param_out_tensor->mutable_data(ctx.GetPlace()); moment_out_tensor->mutable_data(ctx.GetPlace()); - float epsilon = ctx.Attr("epsilon"); - - auto param = framework::EigenVector::Flatten( - *ctx.Input("Param")); - auto grad = framework::EigenVector::Flatten( - *ctx.Input("Grad")); - auto moment = framework::EigenVector::Flatten( - *ctx.Input("Moment")); - auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); - - auto param_out = framework::EigenVector::Flatten(*param_out_tensor); - auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); - auto place = ctx.GetEigenDevice(); - - moment_out.device(place) = moment + grad * grad; - Eigen::DSizes m_dsize(moment_out_tensor->numel()); - param_out.device(place) = - param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + T epsilon = static_cast(ctx.Attr("epsilon")); + + auto* grad_var = ctx.InputVar("Grad"); + if (grad_var->IsType()) { + auto param = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto grad = framework::EigenVector::Flatten( + *ctx.Input("Grad")); + auto moment = framework::EigenVector::Flatten( + *ctx.Input("Moment")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); + + auto param_out = framework::EigenVector::Flatten(*param_out_tensor); + auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); + auto place = ctx.GetEigenDevice(); + + moment_out.device(place) = moment + grad * grad; + Eigen::DSizes m_dsize(moment_out_tensor->numel()); + param_out.device(place) = + param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + } else if (grad_var->IsType()) { + auto* param_tensor = ctx.Input("Param"); + PADDLE_ENFORCE_EQ(param_tensor, param_out_tensor); + + auto* moment_tensor = ctx.Input("Moment"); + PADDLE_ENFORCE_EQ(moment_tensor, moment_out_tensor); + + SparseAdagradFunctor functor; + functor(ctx.device_context(), *ctx.Input("Grad"), + *ctx.Input("LearningRate"), epsilon, + moment_out_tensor, param_out_tensor); + } else { + PADDLE_THROW("Unsupported Variable Type of Grad"); + } } }; diff --git a/paddle/operators/adam_op.cc b/paddle/operators/adam_op.cc index 97a091ae766abfba5412bbd32c34a6f80701fbf7..03faa2a7c5a486cb0d2b6f2f10d140eeb4c6c04e 100644 --- a/paddle/operators/adam_op.cc +++ b/paddle/operators/adam_op.cc @@ -127,4 +127,5 @@ paramOut = param - learningRate * moment_1/ ($\sqrt{(moment_2)} + \epsilon)$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker); REGISTER_OP_CPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu index a3def912e540454275350209435eb01ae2151331..6e34f7818ce20c75692fe21776721ce200b7a147 100644 --- a/paddle/operators/adam_op.cu +++ b/paddle/operators/adam_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 45938006db1231a7a134964d729df6ca114d4dbe..7f7fa1da1c0d8d81d1bcb18a1bf542838eddccf7 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -31,9 +31,9 @@ class AdamOpKernel : public framework::OpKernel { moment1_out_tensor->mutable_data(ctx.GetPlace()); moment2_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adamax_op.cc b/paddle/operators/adamax_op.cc index 14cf3841b33a8153549e4c99ed2b75286e9c64db..d5bbc672e18f392d6a91383b919fefc4b2d8ff0e 100644 --- a/paddle/operators/adamax_op.cc +++ b/paddle/operators/adamax_op.cc @@ -126,4 +126,5 @@ division by 0 error. namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adamax, ops::AdamaxOp, ops::AdamaxOpMaker); REGISTER_OP_CPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.cu b/paddle/operators/adamax_op.cu index fee3b6fc6b656917d79b84f48da8e63be7683890..057ef39025aa23704457ef7bbe54934d06cdc87f 100644 --- a/paddle/operators/adamax_op.cu +++ b/paddle/operators/adamax_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.h b/paddle/operators/adamax_op.h index 2c99832ec08e9c1d9b5458c467d5238f9b1b3c37..bf36ed78604dd88c537db51fbeb38f43d0c46173 100644 --- a/paddle/operators/adamax_op.h +++ b/paddle/operators/adamax_op.h @@ -31,9 +31,9 @@ class AdamaxOpKernel : public framework::OpKernel { moment_out_tensor->mutable_data(ctx.GetPlace()); inf_norm_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 666043e824f885e9c0e79e319d0a38ba108c209a..233a81198e336d3190565fb18556f96979cec0ce 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase { } else { offset = static_cast(*i_tensor.data()); } + VLOG(10) << " Offset = " << offset; return offset; } }; diff --git a/paddle/operators/batch_norm_op.cu b/paddle/operators/batch_norm_op.cu.cc similarity index 100% rename from paddle/operators/batch_norm_op.cu rename to paddle/operators/batch_norm_op.cu.cc diff --git a/paddle/operators/beam_search_op.cc b/paddle/operators/beam_search_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8c3e2a303fb8f12a8886c11cf112b859a6db7bcf --- /dev/null +++ b/paddle/operators/beam_search_op.cc @@ -0,0 +1,185 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/beam_search_op.h" + +#include +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +void BeamSearch::operator()(const framework::LoDTensor &pre_ids, + framework::LoDTensor *selected_ids, + framework::LoDTensor *selected_scores) { + auto items = SelectTopBeamSizeItems(); + auto selected_items = ToMap(items); + PruneEndidCandidates(pre_ids, &selected_items); + // calculate the output tensor's height + size_t num_instances = std::accumulate( + std::begin(items), std::end(items), 0, + [](size_t a, std::vector &b) { return a + b.size(); }); + // the output tensor shape should be [num_instances, 1] + auto dims = framework::make_ddim( + std::vector({static_cast(num_instances), 1})); + selected_ids->Resize(dims); + selected_scores->Resize(dims); + + std::map> hash; + framework::LoD new_lod; + auto *ids_data = selected_ids->mutable_data(platform::CPUPlace()); + auto *scores_data = + selected_scores->mutable_data(platform::CPUPlace()); + + // fill in data + std::vector low_level; + size_t low_offset = 0; + for (auto &items : selected_items) { + low_level.push_back(low_offset); + for (auto &item : items) { + ids_data[low_offset] = item.id; + scores_data[low_offset] = item.score; + low_offset++; + } + } + // fill lod + auto abs_lod = framework::ToAbsOffset(ids_->lod()); + auto &high_level = abs_lod[lod_level_]; + framework::LoD lod(2); + lod[0].assign(high_level.begin(), high_level.end()); + lod[1].assign(low_level.begin(), low_level.end()); + selected_ids->set_lod(lod); + selected_scores->set_lod(lod); +} + +void BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids, + std::vector> *items) { + auto *pre_ids_data = pre_ids.data(); + + for (size_t offset = 0; offset < items->size(); offset++) { + auto prefix_id = pre_ids_data[offset]; + if (prefix_id == end_id_) { + items->at(offset).clear(); + } + } +} + +std::vector> BeamSearch::ToMap( + const std::vector> &items) { + std::vector> result; + for (auto &entries : items) { + for (const auto &item : entries) { + if (item.offset >= result.size()) { + result.resize(item.offset + 1); + } + result[item.offset].push_back(item); + } + } + return result; +} + +std::vector> +BeamSearch::SelectTopBeamSizeItems() { + std::vector> result; + std::vector items; + // for each source sentence, select the top beam_size items across all + // candidate sets. + while (NextItemSet(&items)) { + std::nth_element(std::begin(items), std::begin(items) + beam_size_, + std::end(items), [](const Item &a, const Item &b) { + // TODO(superjom) make score's comparation customizable. + // partial sort in descending order + return a.score > b.score; + }); + // prune the top beam_size items. + if (items.size() > beam_size_) { + items.resize(beam_size_); + } + result.emplace_back(items); + } + return result; +} + +// the candidates of a source +bool BeamSearch::NextItemSet(std::vector *items) { + if (sent_offset_ >= ids_->NumElements(lod_level_)) { + return false; + } + // find the current candidates + auto ids = *ids_; + auto scores = *scores_; + + auto source_abs_two_level_lod = framework::SliceInLevel( + ids.lod(), lod_level_, sent_offset_, sent_offset_ + 1); + source_abs_two_level_lod = framework::ToAbsOffset(source_abs_two_level_lod); + auto abs_lod = framework::ToAbsOffset(ids.lod()); + PADDLE_ENFORCE_GE(source_abs_two_level_lod.size(), 2UL); + + auto *ids_data = ids.data(); + auto *scores_data = scores.data(); + + size_t instance_dim = 1; + for (int i = 1; i < ids.dims().size(); i++) { + instance_dim *= ids.dims()[i]; + } + + items->clear(); + items->reserve(framework::product(ids.dims())); + for (size_t offset = abs_lod[lod_level_][sent_offset_]; + offset < abs_lod[lod_level_][sent_offset_ + 1]; offset++) { + for (size_t d = 0; d < instance_dim; d++) { + const size_t dim_offset = offset * instance_dim + d; + items->emplace_back(offset, ids_data[dim_offset], + scores_data[dim_offset]); + } + } + + sent_offset_++; + return true; +} + +class BeamSearchProtoAndCheckerMaker + : public framework::OpProtoAndCheckerMaker { + public: + BeamSearchProtoAndCheckerMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + // inputs and outputs stored in proto + AddInput("pre_ids", "ids in previous step"); + AddInput("ids", "a LoDTensor of shape of [None,k]"); + AddInput("scores", + "a LoDTensor that has the same shape and LoD with `ids`"); + AddOutput("selected_ids", + "a LoDTensor that stores the IDs selected by beam search"); + AddOutput( + "selected_scores", + "a LoDTensor that has the same shape and LoD with `selected_ids`"); + + // Attributes stored in AttributeMap + AddAttr("level", "the level of LoDTensor"); + AddAttr("beam_size", "beam size for beam search"); + AddAttr("end_id", + "the token id which indicates the end of a sequence"); + + AddComment( + "This is a beam search operator that help to generate sequences."); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(beam_search, paddle::operators::BeamSearchOp, + paddle::operators::BeamSearchProtoAndCheckerMaker); diff --git a/paddle/operators/beam_search_op.h b/paddle/operators/beam_search_op.h new file mode 100644 index 0000000000000000000000000000000000000000..cc556bfe42ab12d73c0eb503d033efc272b5dd68 --- /dev/null +++ b/paddle/operators/beam_search_op.h @@ -0,0 +1,226 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_TESTING +#include "gtest/gtest.h" +#endif + +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +/* + * This is an implementation of beam search. + * + * To explain the details, lets take machine translation task for example, in + * this task, one source sentence is translated to multiple target sentences, + * during this period, one sentence will be translated to multiple translation + * prefixes(target sentence that have not ended), in each time step a prefix + * will have some candidates, input the candidate ids and their corresponding + * scores (probabilities), it will sort and select the top beam_size candidates + * for each source sentence, and store the selected candidates's score and their + * corresponding ids to LoDTensors. + * + * A detailed example: + * + * Input + * + * ids: + * LoD (should have 2 levels) + * first level: [0, 1, 4] + * second level: [0, 1, 2, 3, 4] + * + * tensor's data + * [ + * [4, 2, 5] + * [2, 1, 3] + * [3, 5, 2] + * [8, 2, 1] + * ] + * + * scores: + * LoD same as `ids` + * tensor's data + * [ + * [0.5, 0.3, 0.2] + * [0.6, 0.3, 0.1] + * [0.9, 0.5, 0.1] + * [0.7, 0.5, 0.1] + * ] + * + * the inputs means that there are 2 source sentences to translate, and the + * first source has 1 prefix, the second source has 2 prefix. + * + * lets assume beam size is 2, and the beam search's output should be + * LoD + * first level: + * [0, 1, 2] + * second level: + * [0, 2, 4] + * + * tensor's data + * [[ + * 0.5, + * 0.3, + * 0.9, + * 0.7 + * ]] + * + * TODO all the prune operations should be in the beam search, so it is better + * to split the beam search algorithm into a sequence of smaller operators, and + * the prune operators can be inserted in this sequence. + */ +class BeamSearch { + public: + // TODO(superjom) make type customizable + using id_t = size_t; + using score_t = float; + /* + * Input the arguments that needed by this class. + */ + BeamSearch(const framework::LoDTensor& ids, + const framework::LoDTensor& scores, size_t level, size_t beam_size, + int end_id) + : beam_size_(beam_size), + ids_(&ids), + scores_(&scores), + lod_level_(level), + end_id_(end_id) {} + + /* + * The main function of beam search. + * + * @selected_ids: a [None, 1]-shaped tensor with LoD. + * In a machine translation model, it might be the candidate term id sets, + * each set stored as a varience-length sequence. + * The format might be described with a two-level LoD + * - [[0 1] + * - [0 1 2]] + * - [[] + * - [0 1]] + * the first level of LoD tells that there are two source sentences. The + * second level describes the details of the candidate id set's offsets in + * the + * source sentences. + * + * @selected_scores: a LoD tensor with the same shape and LoD with + * selected_ids. + * It stores the corresponding scores of candidate ids in selected_ids. + * + * Return false if all the input tensor is empty, in machine translation task + * that means no candidates is provided, and the task will stop running. + */ + void operator()(const framework::LoDTensor& pre_ids, + framework::LoDTensor* selected_ids, + framework::LoDTensor* selected_scores); + + protected: + /* + * The basic items help to sort. + */ + struct Item { + Item() {} + Item(size_t offset, size_t id, float score) + : offset(offset), id(id), score(score) {} + // offset in the lod_level_+1 + size_t offset; + // the candidate id + id_t id; + // the corresponding score + score_t score; + }; + + void PruneEndidCandidates(const framework::LoDTensor& pre_ids, + std::vector>* items); + + /* + * Transform the items into a map whose key is offset, value is the items. + * NOTE low performance + */ + std::vector> ToMap( + const std::vector>& inputs); + + /* + * For each source, select top beam_size records. + */ + std::vector> SelectTopBeamSizeItems(); + + /* + * Get the items of next source sequence, return false if no remaining items. + */ + bool NextItemSet(std::vector* items); + + private: + size_t beam_size_; + const framework::LoDTensor* ids_; + const framework::LoDTensor* scores_; + size_t lod_level_{0}; + size_t sent_offset_{0}; + int end_id_{0}; +}; + +class BeamSearchOp : public framework::OperatorBase { + public: + BeamSearchOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + BeamSearchOp(const BeamSearchOp& o) + : framework::OperatorBase( + static_cast(o)) { + PADDLE_THROW("Not Implemented"); + } + + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { + LOG(INFO) << "run beam search op"; + auto ids_var = scope.FindVar(Input("ids")); + auto scores_var = scope.FindVar(Input("scores")); + auto pre_ids_var = scope.FindVar(Input("pre_ids")); + PADDLE_ENFORCE_NOT_NULL(ids_var); + PADDLE_ENFORCE_NOT_NULL(scores_var); + PADDLE_ENFORCE_NOT_NULL(pre_ids_var); + + auto& ids = ids_var->Get(); + auto& scores = scores_var->Get(); + auto& pre_ids = pre_ids_var->Get(); + size_t level = Attr("level"); + size_t beam_size = Attr("beam_size"); + int end_id = Attr("end_id"); + LOG(INFO) << "init beam search"; + BeamSearch alg(ids, scores, level, beam_size, end_id); + + LOG(INFO) << "after beam search"; + auto selected_ids_var = scope.FindVar(Output("selected_ids")); + auto selected_scores_var = scope.FindVar(Output("selected_scores")); + PADDLE_ENFORCE_NOT_NULL(selected_ids_var); + PADDLE_ENFORCE_NOT_NULL(selected_scores_var); + auto& selected_ids_tensor = + *selected_ids_var->GetMutable(); + auto& selected_scores_tensor = + *selected_scores_var->GetMutable(); + LOG(INFO) << "run beam search"; + alg(pre_ids, &selected_ids_tensor, &selected_scores_tensor); + LOG(INFO) << "finish beam search"; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index ffa4f43a327418498c1f110504127e7d2878409d..1113a4c6f357edb4f6b14b73c6eec9c6cca24ce5 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the gradient of Input(Bias). if (d_bias) { d_bias->mutable_data(ctx.GetPlace()); - auto d_bias_mat = EigenMatrix::From(*d_bias); + auto d_bias_mat = framework::EigenVector::Flatten(*d_bias); d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes(0)); } } diff --git a/paddle/operators/concat_op.cu b/paddle/operators/concat_op.cu.cc similarity index 100% rename from paddle/operators/concat_op.cu rename to paddle/operators/concat_op.cu.cc diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 97f31bf22d7072d89bd043045045dcb5bb5518b8..c03dc3e4fb07ac6ecde42be93a1138d91778edf4 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -22,8 +22,6 @@ class CudnnConvOpMaker : public Conv2DOpMaker { CudnnConvOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : Conv2DOpMaker(proto, op_checker) { - AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault(std::vector{1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " @@ -42,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, ops::ConvOpGrad); REGISTER_OP_CPU_KERNEL(conv_cudnn, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv_cudnn_grad, - ops::GemmConvGradKernel); + conv_cudnn_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu b/paddle/operators/conv_cudnn_op.cu.cc similarity index 97% rename from paddle/operators/conv_cudnn_op.cu rename to paddle/operators/conv_cudnn_op.cu.cc index 2aec4a2760260623c4c7054c590afa8e1c6c3fea..5eaf6b33704eb371fff4b949c6cc32a7a5dbc812 100644 --- a/paddle/operators/conv_cudnn_op.cu +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -226,9 +226,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel { T alpha = 1.0f, beta = 0.0f; if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - auto t = framework::EigenVector::Flatten(*input_grad); - t.device(ctx.GetEigenDevice()) = - t.constant(static_cast(0)); + // Because beta is zero, it is unnecessary to reset input_grad. + for (int i = 0; i < groups; i++) { PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData( handle, &alpha, cudnn_filter_desc, @@ -241,9 +240,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv backward filter --------------------- if (filter_grad) { T* filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); - auto t = framework::EigenVector::Flatten(*filter_grad); - t.device(ctx.GetEigenDevice()) = - t.constant(static_cast(0)); + // Because beta is zero, it is unnecessary to reset filter_grad. + for (int i = 0; i < groups; i++) { PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in, @@ -261,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel); +REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel, + paddle::operators::CudnnConvOpKernel); REGISTER_OP_GPU_KERNEL(conv_cudnn_grad, - paddle::operators::CudnnConvGradOpKernel); + paddle::operators::CudnnConvGradOpKernel, + paddle::operators::CudnnConvGradOpKernel); diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index a6f65f10165929316f971d195f3790fd9e7ed376..7a36a9b21aa6a1b415ac5a232e65eda8051c87f8 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -30,6 +30,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); int groups = ctx->Attrs().Get("groups"); + std::vector dilations = ctx->Attrs().Get>("dilations"); int input_channels = in_dims[1]; int output_channels = filter_dims[0]; @@ -52,9 +53,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { "The number of output channels should be divided by groups."); std::vector output_shape({in_dims[0], filter_dims[0]}); - for (size_t i = 0; i < paddings.size(); ++i) { + for (size_t i = 0; i < strides.size(); ++i) { + PADDLE_ENFORCE(in_dims[i + 2] + 2 * paddings[i] - + (dilations[i] * (filter_dims[i + 2] - 1) + 1) > + 0, + "Due to the settings of paddings, filter_dims and " + "dilations, the output size is less than 0, please check " + "again."); output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2], - paddings[i], strides[i])); + dilations[i], paddings[i], strides[i])); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); } @@ -78,9 +85,15 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, AddOutput("Output", "(Tensor) The output tensor of convolution operator. " "The format of output tensor is also NCHW."); - AddAttr>("strides", "strides of convolution operator.") + AddAttr>("strides", + "(vector default:{1, 1}), the " + "strides(h_stride, w_stride) of " + "convolution operator.") .SetDefault({1, 1}); - AddAttr>("paddings", "paddings of convolution operator.") + AddAttr>("paddings", + "(vector default:{0, 0}), the " + "paddings(h_pad, w_pad) of " + "convolution operator.") .SetDefault({0, 0}); AddAttr( "groups", @@ -90,15 +103,20 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, "first half of the input channels, while the second half of the filters " "is only connected to the second half of the input channels.") .SetDefault(1); + AddAttr>("dilations", + "(vector default:{1, 1}), the " + "dilations(h_dilation, w_dilation) of " + "convolution operator.") + .SetDefault({1, 1}); AddComment(R"DOC( Convolution Operator. The convolution operation calculates the output based on the input, filter -and strides, paddings, groups parameters. The size of each dimension of the +and strides, paddings, groups, dilations parameters. The size of each dimension of the parameters is checked in the infer-shape. Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch size, C is the number of channels, H is the height of the feature, and W is -the width of the feature. Parameters(ksize, strides, paddings) are two elements. +the width of the feature. Parameters(ksize, strides, paddings, dilations) are two elements. These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. @@ -109,8 +127,8 @@ Example: Output: Output shape: (N, C_out, H_out, W_out) where - H_out = (H_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; - W_out = (W_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; + H_out = (H_in + 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)) / strides[0] + 1; + W_out = (W_in + 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)) / strides[1] + 1; )DOC"); } @@ -135,13 +153,15 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, AddOutput("Output", "(Tensor) The output tensor of convolution operator." "The format of output tensor is also NCDHW."); - AddAttr>( - "strides", - "(vector, default:{0, 0, 0}), the strides of convolution operator.") + AddAttr>("strides", + "(vector, default:{1, 1, 1}), the " + "strides(d_stride, h_stride, w_stride) of " + "convolution operator.") .SetDefault({1, 1, 1}); - AddAttr>( - "paddings", - "(vector, default:{0, 0, 0}), the paddings of convolution operator.") + AddAttr>("paddings", + "(vector, default:{0, 0, 0}), the " + "paddings(d_pad, h_pad, w_pad) of convolution " + "operator.") .SetDefault({0, 0, 0}); AddAttr( "groups", @@ -151,6 +171,12 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, "first half of the input channels, while the second half of the filters " "is only connected to the second half of the input channels.") .SetDefault(1); + AddAttr>("dilations", + "(vector default:{1, 1, 1}), the " + "dilations(d_dilation, h_dilation, w_dilation) of " + "convolution operator. Currently, conv3d doesn't " + "support dilation.") + .SetDefault({1, 1, 1}); AddComment(R"DOC( Convolution3D Operator. @@ -199,11 +225,15 @@ REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, ops::ConvOpGrad); REGISTER_OP_CPU_KERNEL(conv2d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv2d_grad, ops::GemmConvGradKernel); + conv2d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); REGISTER_OP_CPU_KERNEL(conv3d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv3d_grad, ops::GemmConvGradKernel); + conv3d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_op.cu b/paddle/operators/conv_op.cu.cc similarity index 75% rename from paddle/operators/conv_op.cu rename to paddle/operators/conv_op.cu.cc index 8e6f9da455b7291049aee57189dae15b8bcc2150..546451234a1ed1a4d3119cb175c6d37ae3f0aac1 100644 --- a/paddle/operators/conv_op.cu +++ b/paddle/operators/conv_op.cu.cc @@ -17,11 +17,15 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(conv2d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d_grad, ops::GemmConvGradKernel); + conv2d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); REGISTER_OP_GPU_KERNEL(conv3d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv3d_grad, ops::GemmConvGradKernel); + conv3d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index 7c1729213bf3f5f3987afbf2d51d5b5339ae521d..fac5f1d0e25fe205f89fc7eeb9fadfd8431517d5 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -27,11 +27,24 @@ using Tensor = framework::Tensor; // Base convolution operator definations for other conv // like operators to reuse the implementation. -inline int OutputSize(int input_size, int filter_size, int padding, - int stride) { - int output_size = (input_size - filter_size + 2 * padding) / stride + 1; +inline int OutputSize(int input_size, int filter_size, int dilation, + int padding, int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + const int output_size = (input_size + 2 * padding - dkernel) / stride + 1; return output_size; } +inline bool IsExpand(std::vector& filter_dim, + std::vector& strides, std::vector& paddings, + std::vector& dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} // Define Op classes in .h file so that other conv // operator implementations can reuse the code. @@ -50,14 +63,12 @@ class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; }; class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; }; @@ -73,9 +84,10 @@ class GemmConvKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); output->mutable_data(context.GetPlace()); + int groups = context.Attr("groups"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); - int groups = context.Attr("groups"); + std::vector dilations = context.Attr>("dilations"); const int batch_size = static_cast(input->dims()[0]); @@ -106,14 +118,17 @@ class GemmConvKernel : public framework::OpKernel { framework::DDim col_matrix_shape = framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; - col.mutable_data(col_shape, context.GetPlace()); // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. Tensor col_matrix; - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); + if (is_expand) { + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); @@ -130,24 +145,30 @@ class GemmConvKernel : public framework::OpKernel { int in_step = static_cast(input->dims()[1]) / groups; int out_step = static_cast(output->dims()[1]) / groups; + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + for (int g = 0; g < groups; g++) { Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (filter_shape_vec.size() == 2) { // im2col - math::Im2ColFunctor im2col; - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + im2col(context.device_context(), in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { // vol2col - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), in_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), in_slice, dilations, strides, + paddings, &col); } // gemm @@ -178,9 +199,10 @@ class GemmConvGradKernel : public framework::OpKernel { if (!input_grad && !filter_grad) return; + int groups = context.Attr("groups"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); - int groups = context.Attr("groups"); + std::vector dilations = context.Attr>("dilations"); const int batch_size = static_cast(input->dims()[0]); @@ -230,14 +252,17 @@ class GemmConvGradKernel : public framework::OpKernel { int in_step = static_cast(input->dims()[1]) / groups; int out_step = static_cast(output_grad->dims()[1]) / groups; + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. Tensor col_matrix; - col.mutable_data(col_shape, context.GetPlace()); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); + if (is_expand) { + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } math::SetConstant set_zero; @@ -245,6 +270,9 @@ class GemmConvGradKernel : public framework::OpKernel { input_grad->mutable_data(context.GetPlace()); set_zero(context.device_context(), input_grad, static_cast(0)); + math::Col2VolFunctor col2vol; + math::Col2ImFunctor col2im; + for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = output_grad->Slice(i, i + 1).Resize(output_matrix_shape); @@ -254,24 +282,26 @@ class GemmConvGradKernel : public framework::OpKernel { Tensor out_grad_slice = out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(context.device_context(), filter_slice, true, - out_grad_slice, false, T(1.0), &col_matrix, - T(0.0)); - // col2im + Tensor in_grad_slice = in_grad_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { - math::Col2ImFunctor col2im; - col2im(context.device_context(), in_grad_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + if (!is_expand) { + col_matrix.ShareDataWith(in_grad_slice); + col_matrix.Resize(col_matrix_shape); + } + math::matmul(context.device_context(), filter_slice, true, + out_grad_slice, false, T(1.0), &col_matrix, + T(0.0)); - } else if (filter_shape_vec.size() == 3) { - math::Col2VolFunctor col2vol; - col2vol(context.device_context(), in_grad_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + if (is_expand && filter_shape_vec.size() == 2) { + col2im(context.device_context(), col, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &in_grad_slice); + } else if (is_expand && filter_shape_vec.size() == 3) { + col2vol(context.device_context(), col, dilations, strides, paddings, + &in_grad_slice); } } } @@ -282,7 +312,8 @@ class GemmConvGradKernel : public framework::OpKernel { Tensor filter_grad_ = *filter_grad; filter_grad_.Resize(filter_matrix_shape); set_zero(context.device_context(), filter_grad, static_cast(0)); - + math::Im2ColFunctor im2col; + math::Vol2ColFunctor vol2col; for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = output_grad->Slice(i, i + 1).Resize(output_matrix_shape); @@ -293,16 +324,18 @@ class GemmConvGradKernel : public framework::OpKernel { out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { - math::Im2ColFunctor im2col; - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (filter_shape_vec.size() == 2) { + im2col(context.device_context(), in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), in_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), in_slice, dilations, strides, + paddings, &col); } // gemm diff --git a/paddle/operators/conv_shift_op.cu b/paddle/operators/conv_shift_op.cu index 74ed1b0ed358afc4f1a4e6a0c322eb032029d551..95e13c38a8dd234f49393d2d4808607a447b0d4c 100644 --- a/paddle/operators/conv_shift_op.cu +++ b/paddle/operators/conv_shift_op.cu @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/operators/conv_shift_op.h" +#include "paddle/operators/math/math_function.h" #include "paddle/platform/cuda_helper.h" namespace paddle { @@ -22,7 +23,7 @@ using framework::Tensor; namespace { -inline int div_up(int x, int y) { return (x + y - 1) / y; } +inline int DivUp(int x, int y) { return (x + y - 1) / y; } // Some notes on the design: // @@ -33,9 +34,9 @@ inline int div_up(int x, int y) { return (x + y - 1) / y; } // y is fairly small. For large y, it would probably be more efficient // to also tile across y. template -__global__ void conv_shift_forward(const T *x, const T *y, T *out, int x_width, - int y_width, int y_half_width, - int batch_size) { +__global__ void ConvShiftForward(const T *x, const T *y, int x_width, + int y_width, int y_half_width, int batch_size, + T *out) { extern __shared__ T mem[]; int tx = threadIdx.x; @@ -62,25 +63,26 @@ __global__ void conv_shift_forward(const T *x, const T *y, T *out, int x_width, if (tx < num_x) { int load_i = (i - y_half_width + x_width) % x_width; sx[tx] = x[k * x_width + load_i]; - } else { - return; } __syncthreads(); - // Compute dot product of sx[tx:tx + y_width] and sy. - T sum = 0; - for (int j = 0; j < y_width; ++j) { - sum += sx[tx + j] * sy[j]; - } + if (tx < num_x) { + // Compute dot product of sx[tx:tx + y_width] and sy. + T sum = 0; + for (int j = 0; j < y_width; ++j) { + sum += sx[tx + j] * sy[j]; + } - // Save to out[k, i]. - out[k * x_width + i] = sum; + // Save to out[k, i]. + out[k * x_width + i] = sum; + } } // Compute x gradient - initial naive implementation with atomic add. template -__global__ void conv_shift_dx(const T *dout, const T *y, T *dx, int x_width, - int y_width, int y_half_width, int batch_size) { +__global__ void ConvShiftGradX(const T *dout, const T *y, int x_width, + int y_width, int y_half_width, int batch_size, + T *dx) { int i = blockIdx.x * blockDim.x + threadIdx.x; // x index int j = blockIdx.y; // y index int k = blockIdx.z; // batch index @@ -94,8 +96,8 @@ __global__ void conv_shift_dx(const T *dout, const T *y, T *dx, int x_width, // Compute y gradient - initial naive implementation with atomic add. template -__global__ void conv_shift_dy(const T *x, const T *dout, T *dy, int x_width, - int y_width, int y_half_width, int batch_size) { +__global__ void ConvShiftDy(const T *x, const T *dout, int x_width, int y_width, + int y_half_width, int batch_size, T *dy) { int i = blockIdx.x * blockDim.x + threadIdx.x; // x index int j = blockIdx.y; // y index int k = blockIdx.z; // batch index @@ -125,15 +127,15 @@ class ConvShiftKernel : public framework::OpKernel { int y_half_width = (y_width - 1) / 2; const int x_per_block = 256; - int num_x_blocks = div_up(x_width, x_per_block); + int num_x_blocks = DivUp(x_width, x_per_block); int mem_per_block = (x_per_block + 2 * y_width) * sizeof(T); dim3 grid_dim(num_x_blocks, batch_size); auto stream = context.cuda_device_context().stream(); - conv_shift_forward<<>>( - x_data, y_data, out_data, x_width, y_width, y_half_width, batch_size); + ConvShiftForward<<>>( + x_data, y_data, x_width, y_width, y_half_width, batch_size, out_data); } }; @@ -157,25 +159,26 @@ class ConvShiftGradKernel int y_width = Y->dims()[1]; int y_half_width = (y_width - 1) / 2; - auto stream = context.cuda_device_context().stream(); + auto &device_ctx = context.cuda_device_context(); + math::SetConstant zero; const int x_per_block = 256; - int num_x_blocks = div_up(x_width, x_per_block); + int num_x_blocks = DivUp(x_width, x_per_block); dim3 grid_dim(num_x_blocks, y_width, batch_size); if (dX) { T *dx_data = dX->mutable_data(context.GetPlace()); - cudaMemsetAsync(dx_data, 0, dX->numel() * sizeof(T), stream); - conv_shift_dx<<>>( - dout_data, y_data, dx_data, x_width, y_width, y_half_width, - batch_size); + zero(device_ctx, dX, static_cast(0.0)); + ConvShiftGradX<<>>( + dout_data, y_data, x_width, y_width, y_half_width, batch_size, + dx_data); } if (dY) { T *dy_data = dY->mutable_data(context.GetPlace()); - cudaMemsetAsync(dy_data, 0, dY->numel() * sizeof(T), stream); - conv_shift_dy<<>>( - x_data, dout_data, dy_data, x_width, y_width, y_half_width, - batch_size); + zero(device_ctx, dY, static_cast(0.0)); + ConvShiftDy<<>>( + x_data, dout_data, x_width, y_width, y_half_width, batch_size, + dy_data); } } }; diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc similarity index 55% rename from paddle/operators/conv2d_transpose_cudnn_op.cc rename to paddle/operators/conv_transpose_cudnn_op.cc index fce1357ce5af5f11ccc5941690431393301e6725..0192178ce3a0a47196232f0723baec8324bea60b 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -23,7 +23,24 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { framework::OpAttrChecker* op_checker) : Conv2DTransposeOpMaker(proto, op_checker) { AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault(std::vector{1, 1}); + .SetDefault({1, 1}); + AddAttr("workspace_size_MB", + "workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardward. This size should be carefully setted.") + .SetDefault(4096); + } +}; + +class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { + public: + CudnnConv3DTransposeOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : Conv3DTransposeOpMaker(proto, op_checker) { + AddAttr>("dilations", "dilations of convolution operator.") + .SetDefault({1, 1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " @@ -44,7 +61,22 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); + +REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, + ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, + ops::ConvTransposeOpGrad); + +REGISTER_OP_CPU_KERNEL( + conv3d_transpose_cudnn, + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); +REGISTER_OP_CPU_KERNEL( + conv3d_transpose_cudnn_grad, + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cu b/paddle/operators/conv_transpose_cudnn_op.cu.cc similarity index 89% rename from paddle/operators/conv2d_transpose_cudnn_op.cu rename to paddle/operators/conv_transpose_cudnn_op.cu.cc index 694526ec01214acf2ec6a3d68d3cf072739ac185..494904fe524ae30a5032e489a0c5f20179d8e8ce 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cu +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -54,15 +54,21 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { ScopedTensorDescriptor output_desc; ScopedFilterDescriptor filter_desc; ScopedConvolutionDescriptor conv_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } - // N, M, H, W + // (N, M, H, W) or (N, M, D, H, W) cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); - // N, C, O_h, O_w + // (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w) cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( layout, framework::vectorize2int(output->dims())); - // M, C, K_h, K_w + // (M, C, K_h, K_w) or (M, C, K_d, K_h, K_w) cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims())); cudnnConvolutionDescriptor_t cudnn_conv_desc = @@ -136,13 +142,13 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { ScopedConvolutionDescriptor conv_desc; DataLayout layout = DataLayout::kNCHW; - // Input: (N, M, H, W) + // Input: (N, M, H, W) or (N, M, D, H, W) cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); - // Output: (N, C, O_H, O_W) + // Output: (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w) cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( layout, framework::vectorize2int(output_grad->dims())); - // Filter (M, C, K_H, K_W) + // Filter (M, C, K_h, K_w) or (M, C, K_d K_h, K_w) cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims())); @@ -200,10 +206,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { T alpha = 1.0f, beta = 0.0f; if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - auto t = framework::EigenVector::Flatten(*input_grad); - t.device(ctx.GetEigenDevice()) = - t.constant(static_cast(0)); - + // Because beta is zero, it is unnecessary to reset input_grad. PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward( handle, &alpha, cudnn_output_desc, output_grad_data, cudnn_filter_desc, filter_data, cudnn_conv_desc, data_algo, @@ -214,9 +217,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv backward filter --------------------- if (filter_grad) { T* filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); - auto t = framework::EigenVector::Flatten(*filter_grad); - t.device(ctx.GetEigenDevice()) = - t.constant(static_cast(0)); + // Because beta is zero, it is unnecessary to reset filter_grad. // Gradient with respect to the filter PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( handle, &alpha, cudnn_output_desc, output_grad_data, cudnn_input_desc, @@ -234,6 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel); + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel); + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); + +REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn, + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); +REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad, + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 50081779a5ea3c81884007d4e4b7832dc4ea2bdd..3e55ef036a7fb976117054574d1347fa943acd55 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); - for (size_t i = 0; i < paddings.size(); ++i) { - PADDLE_ENFORCE_EQ(paddings[i], 0, - "No Padding allowed in conv transpose op."); - } - PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, "ConvTransposeOp intput should be 4-D or 5-D tensor."); PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(), @@ -51,8 +46,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { "as the number of filters."); std::vector output_shape({in_dims[0], filter_dims[1]}); - for (size_t i = 0; i < paddings.size(); ++i) { - output_shape.push_back((in_dims[i + 2] - 1) * strides[i] + + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + filter_dims[i + 2]); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); @@ -79,11 +74,13 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( "The format of output tensor is also NCHW."); AddAttr>( "strides", - "(vector defalut:{1, 1}), strides of convolution transpose operator.") + "(vector defalut:{1, 1}), the strides(h_stride, w_stride) of " + "convolution transpose operator.") .SetDefault({1, 1}); AddAttr>( "paddings", - "(vector defalut:{0, 0}), paddings of convolution transpose operator.") + "(vector defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution " + "transpose operator.") .SetDefault({0, 0}); AddComment(R"DOC( Convolution2D Transpose Operator. @@ -132,13 +129,14 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( "Where N is batch size, C is " "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); - AddAttr>( - "strides", - "(vector defalut:{1, 1, 1}), strides of convolution transpose operator.") + AddAttr>("strides", + "(vector defalut:{1, 1, 1}), the " + "strides{d_stride, h_stride, w_stride} of " + "convolution transpose operator.") .SetDefault({1, 1, 1}); - AddAttr>( - "paddings", - "(vector defalut:{0, 0, 0}), paddings of convolution transpose operator.") + AddAttr>("paddings", + "(vector defalut:{0, 0, 0}), paddings(d_pad, " + "h_pad, w_pad) of convolution transpose operator.") .SetDefault({0, 0, 0}); AddComment(R"DOC( Convolution3D Transpose Operator. @@ -187,17 +185,21 @@ REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker, REGISTER_OP_CPU_KERNEL( conv2d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker, conv3d_transpose_grad, ops::ConvTransposeOpGrad); REGISTER_OP_CPU_KERNEL( conv3d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv3d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_op.cu b/paddle/operators/conv_transpose_op.cu.cc similarity index 78% rename from paddle/operators/conv_transpose_op.cu rename to paddle/operators/conv_transpose_op.cu.cc index 401cddb379ced134b800d2a078fe130a2850fbb2..4165eb0c7b048b83bbd94c57b971530043b66545 100644 --- a/paddle/operators/conv_transpose_op.cu +++ b/paddle/operators/conv_transpose_op.cu.cc @@ -18,14 +18,18 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( conv2d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_GPU_KERNEL( conv2d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP_GPU_KERNEL( conv3d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_GPU_KERNEL( conv3d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index 6c1a6220d784abf89ec789f94d9cff9e5414db04..ab336ad23ce1c180b68d04e4c85b299e301d5376 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -43,16 +43,12 @@ class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { class ConvTransposeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - - protected: void InferShape(framework::InferShapeContext* ctx) const override; }; class ConvTransposeOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - - protected: void InferShape(framework::InferShapeContext* ctx) const override; }; @@ -66,6 +62,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); std::vector strides = context.Attr>("strides"); + std::vector paddings = context.Attr>("paddings"); // TODO(Zhuoyuan): Paddings can be added in future. // groups will alway be disabled in conv2dtranspose. @@ -120,6 +117,10 @@ class GemmConvTransposeKernel : public framework::OpKernel { math::SetConstant set_zero; set_zero(context.device_context(), output, static_cast(0)); + math::Col2ImFunctor col2im; + math::Col2VolFunctor col2vol; + std::vector dilations({1, 1, 1}); + // convolution transpose: gemm + col2im or col2vol (similar to conv-backward // on input) for (int i = 0; i < batch_size; i++) { @@ -138,16 +139,16 @@ class GemmConvTransposeKernel : public framework::OpKernel { if (filter_shape_vec.size() == 2) { // col2im: col_matrix -> dy // from (c * k_h * k_w, h * w) to (c, o_h, o_w) - math::Col2ImFunctor col2im; - - col2im(context.device_context(), output_batch, col, strides[0], - strides[1], 0, 0, 0, 0); + col2im(context.device_context(), col, + std::vector{dilations[0], dilations[1]}, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &output_batch); } else if (filter_shape_vec.size() == 3) { // col2vol: col_matrix -> dy // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) - math::Col2VolFunctor col2vol; - col2vol(context.device_context(), output_batch, col, strides[0], - strides[1], strides[2], 0, 0, 0); + col2vol(context.device_context(), col, dilations, strides, paddings, + &output_batch); } } } @@ -171,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { if ((!input_grad) && (!filter_grad)) return; std::vector strides = context.Attr>("strides"); - // Actually, no paddings and groups allowed in conv transpose. std::vector paddings = context.Attr>("paddings"); const int batch_size = static_cast(input->dims()[0]); @@ -228,6 +228,10 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { Tensor filter_grad_; math::SetConstant set_zero; + math::Im2ColFunctor im2col; + math::Vol2ColFunctor vol2col; + std::vector dilations({1, 1, 1}); + if (input_grad) { input_grad->mutable_data(context.GetPlace()); set_zero(context.device_context(), input_grad, static_cast(0)); @@ -247,17 +251,16 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { if (filter_shape_vec.size() == 2) { // im2col: dy -> col matrix // from (c, o_h, o_w) to (c * k_h * k_w, h * w) - math::Im2ColFunctor im2col; - im2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + im2col(context.device_context(), output_grad_batch, + std::vector{dilations[0], dilations[1]}, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { // vol2col: dy -> col_matrix // from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w) - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), output_grad_batch, dilations, + strides, paddings, &col); } if (input_grad) { diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 68c56f531f941e1b8f66ac7ba6bf318881642c4f..62a4e484eceeabc4cc26e68ac54a50be1ac95df7 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel { // compute dy if (out_grad_y) { out_grad_y->mutable_data(context.GetPlace()); - auto dy = EigenMatrix::Reshape(*out_grad_y, 1); + auto dy = EigenVector::Flatten(*out_grad_y); auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}})); } diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 530b319a44eac915f0d49eb55bfe5929908eab26..6212e39dfde33c5943958adbd1a0a052262e119e 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -23,8 +23,6 @@ template __global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X, const int64_t* label, const int N, const int D) { - // TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file. - // CUDA_1D_KERNEL_LOOP(i, N) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { int idx = i * D + label[i]; diff --git a/paddle/operators/detail/safe_ref.h b/paddle/operators/detail/safe_ref.h new file mode 100644 index 0000000000000000000000000000000000000000..b71af17309f9f46b5c87f0f479d4e03443fa7f93 --- /dev/null +++ b/paddle/operators/detail/safe_ref.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +namespace paddle { +namespace operators { +namespace detail { +/** + * Get Reference From Pointer with check. The error message is printf format, + * and passed by `args` + */ +template +inline T &Ref(T *ptr, ARGS &&... args) { + PADDLE_ENFORCE(ptr != nullptr, args...); + return *ptr; +} +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/elementwise_add_op.cc b/paddle/operators/elementwise_add_op.cc index ebe1de90c7d245756de759d8675a30f955843798..432b9ba6f72f8dd11c666d5473c570bde60de995 100644 --- a/paddle/operators/elementwise_add_op.cc +++ b/paddle/operators/elementwise_add_op.cc @@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker, elementwise_add_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_add, - ops::ElementwiseAddKernel); + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); REGISTER_OP_CPU_KERNEL( elementwise_add_grad, - ops::ElementwiseAddGradKernel); + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel); diff --git a/paddle/operators/elementwise_add_op.cu b/paddle/operators/elementwise_add_op.cu index 85d063a76b5592c716a5bdf23a0993976abc6ae4..7591428ac7c2f74f25f0f7d818eafcf59c8e4a4f 100644 --- a/paddle/operators/elementwise_add_op.cu +++ b/paddle/operators/elementwise_add_op.cu @@ -19,7 +19,13 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( elementwise_add, - ops::ElementwiseAddKernel); + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); REGISTER_OP_GPU_KERNEL( elementwise_add_grad, - ops::ElementwiseAddGradKernel); + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel, + ops::ElementwiseAddGradKernel); diff --git a/paddle/operators/elementwise_div_op.cc b/paddle/operators/elementwise_div_op.cc index de75816a249002549940b04d928c88c17d075917..7a325199bd07e44042a4e8b3aae0ab93fae1c351 100644 --- a/paddle/operators/elementwise_div_op.cc +++ b/paddle/operators/elementwise_div_op.cc @@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker, elementwise_div_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_div, - ops::ElementwiseDivKernel); + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel); REGISTER_OP_CPU_KERNEL( elementwise_div_grad, - ops::ElementwiseDivGradKernel); + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel); diff --git a/paddle/operators/elementwise_div_op.cu b/paddle/operators/elementwise_div_op.cu index b96aa31748c77f0d07f9bb7fb19235239983abd5..de4d0c33442a1fcfe0dd4c16df7ceeec737fbc6d 100644 --- a/paddle/operators/elementwise_div_op.cu +++ b/paddle/operators/elementwise_div_op.cu @@ -19,7 +19,13 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( elementwise_div, - ops::ElementwiseDivKernel); + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel); REGISTER_OP_GPU_KERNEL( elementwise_div_grad, - ops::ElementwiseDivGradKernel); + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel); diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index ffa10486f123963274aa478eb4c607e32138bcec..8851267a524f51773a9f86ff83943cea4cb042aa 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker, REGISTER_OP_CPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, - ops::ElementwiseMulKernel); + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel); REGISTER_OP_CPU_KERNEL( elementwise_mul_grad, ops::ElementwiseMulGradKernel, - ops::ElementwiseMulGradKernel); + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel); diff --git a/paddle/operators/elementwise_mul_op.cu b/paddle/operators/elementwise_mul_op.cu index 056f081d3e6ac349978ff00689700c035bed8e39..b0dfdee1ccef56c6cda06ae6759017294fa5115c 100644 --- a/paddle/operators/elementwise_mul_op.cu +++ b/paddle/operators/elementwise_mul_op.cu @@ -20,8 +20,12 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, - ops::ElementwiseMulKernel); + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel); REGISTER_OP_GPU_KERNEL( elementwise_mul_grad, ops::ElementwiseMulGradKernel, - ops::ElementwiseMulGradKernel); + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel); diff --git a/paddle/operators/elementwise_sub_op.cc b/paddle/operators/elementwise_sub_op.cc index 39702dad0ee61de71ff0d54765e6f73de93cee9c..95d7979e39bfe7b484acb7771d1bd078014293a2 100644 --- a/paddle/operators/elementwise_sub_op.cc +++ b/paddle/operators/elementwise_sub_op.cc @@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker, elementwise_sub_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_sub, - ops::ElementwiseSubKernel); + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel); REGISTER_OP_CPU_KERNEL( elementwise_sub_grad, - ops::ElementwiseSubGradKernel); + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel); diff --git a/paddle/operators/elementwise_sub_op.cu b/paddle/operators/elementwise_sub_op.cu index 0efb92fce9975ed9fa029a3ce919589d09efb0d7..ec23bec35feae26f5463c575b1ab6f58d417e100 100644 --- a/paddle/operators/elementwise_sub_op.cu +++ b/paddle/operators/elementwise_sub_op.cu @@ -19,7 +19,13 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( elementwise_sub, - ops::ElementwiseSubKernel); + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel, + ops::ElementwiseSubKernel); REGISTER_OP_GPU_KERNEL( elementwise_sub_grad, - ops::ElementwiseSubGradKernel); + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel, + ops::ElementwiseSubGradKernel); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 85871ebbfcd8ee38ef5e8078d1d6cb6bdda46a7b..985b5d1e865e513d833bff72dcd20a8f20851d8c 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like, REGISTER_OP_CPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cu b/paddle/operators/fill_constant_batch_size_like_op.cu.cc similarity index 81% rename from paddle/operators/fill_constant_batch_size_like_op.cu rename to paddle/operators/fill_constant_batch_size_like_op.cu.cc index 298c196f1dfef388640e34153264986bd518a11a..9e7a1eeab863c962ca72908e561e12a04d5021c5 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cu +++ b/paddle/operators/fill_constant_batch_size_like_op.cu.cc @@ -12,11 +12,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/framework/op_registry.h" #include "paddle/operators/fill_constant_batch_size_like_op.h" +#include "paddle/framework/op_registry.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 8ab39d4fb012b8fa3883f33e4d15be7918500354..95fb5932b8b555e1357adc9fdfb7b6e6db7da71d 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -54,5 +54,8 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); REGISTER_OP_CPU_KERNEL( - fill_zeros_like, - ops::FillZerosLikeKernel); + fill_zeros_like, ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu.cc similarity index 69% rename from paddle/operators/fill_zeros_like_op.cu rename to paddle/operators/fill_zeros_like_op.cu.cc index a6d4ba64bde534ea76867c456537b130a45b9496..1501a17441072223ba0e8cf5b6c8cdd5e903a467 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu.cc @@ -12,10 +12,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" +#include "paddle/framework/op_registry.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - fill_zeros_like, - ops::FillZerosLikeKernel); + fill_zeros_like, ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/operators/gru_op.cu b/paddle/operators/gru_op.cu.cc similarity index 97% rename from paddle/operators/gru_op.cu rename to paddle/operators/gru_op.cu.cc index 35538c74b4bf678f8068999bfadb2589a1671be0..0ceff94ec3ddaadbd5f0ca4f5a4eebe6cb8ee3a9 100644 --- a/paddle/operators/gru_op.cu +++ b/paddle/operators/gru_op.cu.cc @@ -12,7 +12,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU #include "paddle/operators/gru_op.h" namespace ops = paddle::operators; diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h index ba90ec9816c40a6a49065ac6efcee6b93dffce90..1b18368e0e16365682520b62a7f6adab0cbb527f 100644 --- a/paddle/operators/gru_op.h +++ b/paddle/operators/gru_op.h @@ -24,12 +24,17 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +using Tensor = framework::Tensor; -template -using EigenMatrix = framework::EigenMatrix; +template +inline void ReorderInitState(const platform::DeviceContext& ctx, + const framework::Tensor& src, const size_t* index, + framework::Tensor* dst, bool indexed_src) { + math::CopyMatrixRowsFunctor row_shuffle; + dst->mutable_data(src.dims(), ctx.GetPlace()); + row_shuffle(ctx, src, index, *dst, indexed_src); +} template class GRUKernel : public framework::OpKernel { @@ -37,7 +42,6 @@ class GRUKernel : public framework::OpKernel { void BatchCompute(const framework::ExecutionContext& context) const { auto* input = context.Input("Input"); auto* h0 = context.Input("H0"); - const T* h0_data = h0 ? h0->data() : nullptr; auto* weight = context.Input("Weight"); const T* weight_data = weight->data(); auto* bias = context.Input("Bias"); @@ -57,24 +61,31 @@ class GRUKernel : public framework::OpKernel { bool is_reverse = context.Attr("is_reverse"); math::LoDTensor2BatchFunctor to_batch; - to_batch(context.device_context(), *input, *batch_gate, true, is_reverse); + auto& dev_ctx = context.device_context(); + to_batch(dev_ctx, *input, *batch_gate, true, is_reverse); - int frame_size = hidden_dims[1]; - int batch_size = hidden_dims[0]; - auto g = EigenMatrix::From(*batch_gate); - auto place = context.GetEigenDevice(); if (bias) { - auto b = EigenMatrix::From(*bias); - g.device(place) = g + - b.reshape(Eigen::array({{1, frame_size * 3}})) - .broadcast(Eigen::array({{batch_size, 1}})); + math::RowwiseAdd add_bias; + add_bias(dev_ctx, *batch_gate, *bias, batch_gate); } + int frame_size = hidden_dims[1]; math::hl_gru_value gru_value; gru_value.gateWeight = const_cast(weight_data); gru_value.stateWeight = const_cast(weight_data + 2 * frame_size * frame_size); - gru_value.prevOutValue = const_cast(h0_data); + Tensor ordered_h0; + const size_t* order = batch_gate->lod()[2].data(); + if (h0) { + // Since the batch computing for GRU reorders the input sequences + // according to their length. The initialized cell state also needs + // to reorder. + ReorderInitState(context.device_context(), *h0, order, + &ordered_h0, true); + gru_value.prevOutValue = ordered_h0.data(); + } else { + gru_value.prevOutValue = nullptr; + } auto batch_starts = batch_gate->lod()[0]; size_t num_batch = batch_starts.size() - 1; for (size_t n = 0; n < num_batch; n++) { @@ -89,7 +100,7 @@ class GRUKernel : public framework::OpKernel { gru_value.gateValue = gate_t.data(); gru_value.resetOutputValue = reset_hidden_prev_t.data(); math::GRUUnitFunctor::compute( - context.device_context(), gru_value, frame_size, cur_batch_size, + dev_ctx, gru_value, frame_size, cur_batch_size, math::ActiveType(context.Attr("activation")), math::ActiveType(context.Attr("gate_activation"))); gru_value.prevOutValue = gru_value.outputValue; @@ -97,7 +108,7 @@ class GRUKernel : public framework::OpKernel { math::Batch2LoDTensorFunctor to_seq; batch_hidden->set_lod(batch_gate->lod()); - to_seq(context.device_context(), *batch_hidden, *hidden); + to_seq(dev_ctx, *batch_hidden, *hidden); } void Compute(const framework::ExecutionContext& context) const override { @@ -110,7 +121,6 @@ class GRUGradKernel : public framework::OpKernel { public: void BatchCompute(const framework::ExecutionContext& context) const { auto* h0 = context.Input("H0"); - const T* h0_data = h0 ? h0->data() : nullptr; auto* weight = context.Input("Weight"); const T* weight_data = weight->data(); auto* batch_gate = context.Input("BatchGate"); @@ -138,15 +148,25 @@ class GRUGradKernel : public framework::OpKernel { batch_reset_hidden_prev_grad.mutable_data(hidden_dims, context.GetPlace()); math::SetConstant zero; - zero(context.device_context(), &batch_hidden_grad, static_cast(0.0)); - zero(context.device_context(), &batch_gate_grad, static_cast(0.0)); - zero(context.device_context(), &batch_reset_hidden_prev_grad, - static_cast(0.0)); + auto& dev_ctx = context.device_context(); + zero(dev_ctx, &batch_hidden_grad, static_cast(0.0)); + zero(dev_ctx, &batch_gate_grad, static_cast(0.0)); + zero(dev_ctx, &batch_reset_hidden_prev_grad, static_cast(0.0)); + + Tensor ordered_h0, ordered_h0_grad; + const size_t* order = batch_gate->lod()[2].data(); + if (h0) { + ReorderInitState(context.device_context(), *h0, order, + &ordered_h0, true); + } + if (h0_grad) { + ordered_h0_grad.mutable_data(h0_grad->dims(), context.GetPlace()); + zero(context.device_context(), &ordered_h0_grad, static_cast(0.0)); + } bool is_reverse = context.Attr("is_reverse"); batch_hidden_grad.set_lod(batch_hidden->lod()); - to_batch(context.device_context(), *hidden_grad, batch_hidden_grad, false, - is_reverse); + to_batch(dev_ctx, *hidden_grad, batch_hidden_grad, false, is_reverse); math::hl_gru_value gru_value; gru_value.gateWeight = const_cast(weight_data); @@ -157,7 +177,7 @@ class GRUGradKernel : public framework::OpKernel { if (weight_grad) { gru_grad.gateWeightGrad = weight_grad->mutable_data(context.GetPlace()); - zero(context.device_context(), weight_grad, static_cast(0.0)); + zero(dev_ctx, weight_grad, static_cast(0.0)); gru_grad.stateWeightGrad = weight_grad->data() + 2 * frame_size * frame_size; } else { @@ -185,14 +205,9 @@ class GRUGradKernel : public framework::OpKernel { batch_reset_hidden_prev_grad.Slice(bstart, bend); gru_grad.resetOutputGrad = reset_hidden_prev_grad_t.data(); if (n == 0) { - gru_value.prevOutValue = const_cast(h0_data); - if (h0_grad) { - T* h0_grad_data = h0_grad->mutable_data(context.GetPlace()); - zero(context.device_context(), h0_grad, static_cast(0.0)); - gru_grad.prevOutGrad = h0_grad_data; - } else { - gru_grad.prevOutGrad = nullptr; - } + gru_value.prevOutValue = h0 ? ordered_h0.data() : nullptr; + gru_grad.prevOutGrad = + h0 && h0_grad ? ordered_h0_grad.data() : nullptr; } else { int bstart_pre = static_cast(batch_starts[n - 1]); Tensor hidden_prev_t = batch_hidden->Slice(bstart_pre, bstart); @@ -202,8 +217,7 @@ class GRUGradKernel : public framework::OpKernel { } math::GRUUnitGradFunctor::compute( - context.device_context(), gru_value, gru_grad, frame_size, - cur_batch_size, + dev_ctx, gru_value, gru_grad, frame_size, cur_batch_size, math::ActiveType(context.Attr("activation")), math::ActiveType(context.Attr("gate_activation"))); } @@ -211,14 +225,16 @@ class GRUGradKernel : public framework::OpKernel { input_grad->mutable_data(context.GetPlace()); math::Batch2LoDTensorFunctor to_seq; batch_gate_grad.set_lod(batch_gate->lod()); - to_seq(context.device_context(), batch_gate_grad, *input_grad); + to_seq(dev_ctx, batch_gate_grad, *input_grad); } if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); - auto d_b = EigenMatrix::From(*bias_grad); - auto d_g = EigenMatrix::From(batch_gate_grad); - auto place = context.GetEigenDevice(); - d_b.device(place) = d_g.sum(Eigen::array({{0}})); + math::ColwiseSum col_sum; + col_sum(dev_ctx, batch_gate_grad, bias_grad); + } + if (h0 && h0_grad) { + ReorderInitState(context.device_context(), ordered_h0_grad, + order, h0_grad, false); } } diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc index 89c027ff1eea93012dc5ab22b081786efc328e96..877c969103cfc17e1b170449d1922d9c7db2a58b 100644 --- a/paddle/operators/gru_unit_op.cc +++ b/paddle/operators/gru_unit_op.cc @@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(sigmoid) .InEnum({identity, sigmoid, tanh, relu}); AddComment(R"DOC( -GRUUnit Operator. - -This operator implements partial calculations of the GRU unit as follows: +GRUUnit Operator implements partial calculations of the GRU unit as following: $$ -update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ -output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ -output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) $$ -The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +which is same as one time step of GRU Operator. + +@note To implement the complete GRU unit, fully-connected operator must be +used before to feed xu, xr and xc as the Input of GRUUnit operator. )DOC"); } @@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput("Hidden"), "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "Gate"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), "Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Hidden"); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index c53e7d9827e0395e6ce613302e732b2797f83cdd..050430d3252d05236219cd5ced5a792c21413c1f 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel { auto c = g.slice(c_offsets, extents); // output candidate // calculate final output - h.device(place) = u * (h_p - c) + c; + h.device(place) = u * (c - h_p) + h_p; } }; @@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel { auto* weight_grad = context.Output(framework::GradVarName("Weight")); auto* bias_grad = context.Output(framework::GradVarName("Bias")); - input_grad->mutable_data(context.GetPlace()); - hidden_prev_grad->mutable_data(context.GetPlace()); - weight_grad->mutable_data(context.GetPlace()); Tensor gate_grad; - gate_grad.mutable_data(input->dims(), context.GetPlace()); Tensor reset_hidden_prev_grad; - reset_hidden_prev_grad.mutable_data(reset_hidden_prev->dims(), - context.GetPlace()); - - int batch_size = input->dims()[0]; - int frame_size = hidden_prev->dims()[1]; const T* hidden_prev_data = hidden_prev->data(); - T* hidden_prev_grad_data = hidden_prev_grad->data(); const T* weight_data = weight->data(); - T* weight_grad_data = weight_grad->data(); - T* gate_grad_data = gate_grad.data(); + T* gate_grad_data = + gate_grad.mutable_data(input->dims(), context.GetPlace()); const T* reset_hidden_prev_data = reset_hidden_prev->data(); - T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data(); + T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.mutable_data( + reset_hidden_prev->dims(), context.GetPlace()); auto h_p = EigenMatrix::From(*hidden_prev); auto g = EigenMatrix::From(*gate); auto d_h = EigenMatrix::From(*hidden_grad); - auto d_x = EigenMatrix::From(*input_grad); - auto d_h_p = EigenMatrix::From(*hidden_prev_grad); auto d_g = EigenMatrix::From(gate_grad); auto d_r_h_p = EigenMatrix::From(reset_hidden_prev_grad); auto place = context.GetEigenDevice(); + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + Eigen::array extents({{batch_size, frame_size}}); Eigen::array u_offsets({{0, 0}}); auto u = g.slice(u_offsets, extents); // update gate @@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel { // backward for unactivated update gate ActGradCompute(context.Attr("gate_activation"), place, u, u, - d_g.slice(u_offsets, extents), d_h * (h_p - c)); + d_g.slice(u_offsets, extents), d_h * (c - h_p)); // backward for unactivated output candidate ActGradCompute(context.Attr("activation"), place, c, c, - d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + d_g.slice(c_offsets, extents), d_h * u); // backward for reset_hidden_prev math::gemm(context.device_context(), false, true, batch_size, frame_size, frame_size, 1, gate_grad_data + frame_size * 2, frame_size * 3, weight_data + frame_size * frame_size * 2, frame_size, 0, reset_hidden_prev_grad_data, frame_size); - // backward for state_weight - math::gemm( - context.device_context(), true, false, frame_size, frame_size, - batch_size, 1, reset_hidden_prev_data, frame_size, - gate_grad_data + frame_size * 2, frame_size * 3, 0, - weight_grad_data + frame_size * frame_size * 2, frame_size); // backward for unactivated reset gate ActGradCompute(context.Attr("gate_activation"), place, r, r, d_g.slice(r_offsets, extents), d_r_h_p * h_p); - // backward for update_gate_weight and reset_gate_weight - math::gemm(context.device_context(), true, false, frame_size, - frame_size * 2, batch_size, 1, hidden_prev_data, - frame_size, gate_grad_data, frame_size * 3, 0, - weight_grad_data, frame_size * 2); + // backward for weight + if (weight_grad) { + T* weight_grad_data = weight_grad->mutable_data(context.GetPlace()); + // backward for state_weight + math::gemm( + context.device_context(), true, false, frame_size, frame_size, + batch_size, 1, reset_hidden_prev_data, frame_size, + gate_grad_data + frame_size * 2, frame_size * 3, 0, + weight_grad_data + frame_size * frame_size * 2, frame_size); + + // backward for update_gate_weight and reset_gate_weight + math::gemm(context.device_context(), true, false, frame_size, + frame_size * 2, batch_size, 1, hidden_prev_data, + frame_size, gate_grad_data, frame_size * 3, 0, + weight_grad_data, frame_size * 2); + } // backward for hidden_prev - d_h_p.device(place) = d_r_h_p * r + d_h * u; - math::gemm(context.device_context(), false, true, batch_size, - frame_size, frame_size * 2, 1, gate_grad_data, - frame_size * 3, weight_data, frame_size * 2, 1, - hidden_prev_grad_data, frame_size); + if (hidden_prev_grad) { + T* hidden_prev_grad_data = + hidden_prev_grad->mutable_data(context.GetPlace()); + auto d_h_p = EigenMatrix::From(*hidden_prev_grad); + d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u); + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size * 2, 1, gate_grad_data, + frame_size * 3, weight_data, frame_size * 2, 1, + hidden_prev_grad_data, frame_size); + } // backward for input - d_x.device(place) = d_g; + if (input_grad) { + input_grad->mutable_data(context.GetPlace()); + auto d_x = EigenMatrix::From(*input_grad); + d_x.device(place) = d_g; + } // backward for bias if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); diff --git a/paddle/operators/is_empty_op.cc b/paddle/operators/is_empty_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..54fecf44e881b5c283c81580fd161da9808d253e --- /dev/null +++ b/paddle/operators/is_empty_op.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +constexpr char kInput[] = "X"; +constexpr char kOutput[] = "Out"; + +class IsEmptyOp : public framework::OperatorBase { + public: + IsEmptyOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + // get input + auto *var = scope.FindVar(Input(kInput)); + PADDLE_ENFORCE_NOT_NULL(var); + auto &tensor = var->Get(); + // get output + auto *out = scope.FindVar(Output(kOutput)); + PADDLE_ENFORCE_NOT_NULL(out); + auto *out_tensor = out->GetMutable(); + + out_tensor->Resize({1}); + out_tensor->mutable_data(platform::CPUPlace())[0] = + framework::product(tensor.dims()) == 0; + } +}; + +class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + IsEmptyOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput(kInput, "(Tensor) Tensor which is to be checked."); + AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not."); + AddComment(R"DOC( +IsEmpty Operator which checks whether a tensor is empty. + +It will just return product(tensor.ddims()) > 0; + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(is_empty, paddle::operators::IsEmptyOp, + paddle::operators::IsEmptyOpProtoMaker); diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index ddf73981751798c72cef08f2dd5c87580b45aec3..872f659fed40d7479d9d8bed6c8469fb28282253 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { ll -= std::log(sum); // Now ll is equal to -log(Z). - const int* lbl = label.data(); + const int64_t* lbl = label.data(); PADDLE_ENFORCE_LT( static_cast(*std::max_element(lbl, lbl + seq_length)), tag_num, "An invalid tag label that execesses the largest tag number."); @@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { Tensor* emission_grad) const { const T* w_exps = transition_exps.data(); const T* x_exps = emission_exps.data(); - const int* label_value = label.data(); + const int64_t* label_value = label.data(); T* beta_value = beta->data(); auto x_dims = emission_exps.dims(); diff --git a/paddle/operators/logical_op.cc b/paddle/operators/logical_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a37582c1d840ac11f847d8743c824ef1aef0fd66 --- /dev/null +++ b/paddle/operators/logical_op.cc @@ -0,0 +1,153 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/logical_op.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { +template +class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + BinaryLogicalOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + OpComment comment; + AddInput("X", + string::Sprintf("(LoDTensor) Left hand operand of %s operator", + comment.type)); + AddInput("Y", + string::Sprintf("(LoDTensor) Right hand operand of %s operator", + comment.type)); + AddOutput("Out", string::Sprintf( + "(LoDTensor) n-dim bool tensor. Each element is %s", + comment.equation)); + AddComment(string::Sprintf(R"DOC(%s Operator + +It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim boolean tensors. +Each element of Out is calculated by %s +)DOC", + comment.type, comment.equation)); + } +}; + +template +class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + UnaryLogicalOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + OpComment comment; + AddInput("X", string::Sprintf("(LoDTensor) Operand of %s operator", + comment.type)); + AddOutput("Out", string::Sprintf( + "(LoDTensor) n-dim bool tensor. Each element is %s", + comment.equation)); + AddComment(string::Sprintf(R"DOC(%s Operator + +It operates element-wise on X, and returns the Out. X and Out are N-dim boolean tensors. +Each element of Out is calculated by %s +)DOC", + comment.type, comment.equation)); + } +}; + +template +class BinaryLogicalOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + OpComment comment; + PADDLE_ENFORCE(context->HasInput("X"), + "Input(X) of %s operator must not be null", comment.type); + PADDLE_ENFORCE(context->HasInput("Y"), + "Input(Y) of %s operator must not be null", comment.type); + auto dim_x = context->GetInputDim("X"); + auto dim_y = context->GetInputDim("Y"); + PADDLE_ENFORCE_EQ(framework::product(dim_x), framework::product(dim_y), + "The number of elements in X and Y should be same"); + + context->SetOutputDim("Out", context->GetInputDim("X")); + context->ShareLoD("X", "Out"); + } +}; + +template +class UnaryLogicalOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + OpComment comment; + PADDLE_ENFORCE(context->HasInput("X"), + "Input(X) of %s operator must not be null", comment.type); + auto dim_x = context->GetInputDim("X"); + + context->SetOutputDim("Out", context->GetInputDim("X")); + context->ShareLoD("X", "Out"); + } +}; + +class LogicalOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + framework::OpKernelType kt = OperatorWithKernel::GetKernelType(ctx); + // LogicalOp kernel's device type is decided by input tensor place + kt.place_ = ctx.Input("X")->place(); + return kt; + } +}; + +} // namespace operators +} // namespace paddle + +#define REGISTER_BINARY_LOGICAL_OP(op_type, _equation) \ + struct _##op_type##Comment { \ + static char type[]; \ + static char equation[]; \ + }; \ + char _##op_type##Comment::type[]{#op_type}; \ + char _##op_type##Comment::equation[]{_equation}; \ + REGISTER_OPERATOR( \ + op_type, ::paddle::operators::LogicalOp, \ + ::paddle::operators::BinaryLogicalOpProtoMaker<_##op_type##Comment>, \ + ::paddle::operators::BinaryLogicalOpInferShape<_##op_type##Comment>, \ + ::paddle::framework::EmptyGradOpMaker); + +#define REGISTER_UNARY_LOGICAL_OP(op_type, _equation) \ + struct _##op_type##Comment { \ + static char type[]; \ + static char equation[]; \ + }; \ + char _##op_type##Comment::type[]{#op_type}; \ + char _##op_type##Comment::equation[]{_equation}; \ + REGISTER_OPERATOR( \ + op_type, ::paddle::operators::LogicalOp, \ + ::paddle::operators::UnaryLogicalOpProtoMaker<_##op_type##Comment>, \ + ::paddle::operators::UnaryLogicalOpInferShape<_##op_type##Comment>, \ + ::paddle::framework::EmptyGradOpMaker); + +REGISTER_BINARY_LOGICAL_OP(logical_and, "Out = X && Y"); +REGISTER_BINARY_LOGICAL_KERNEL(logical_and, CPU, + paddle::operators::LogicalAndFunctor); +REGISTER_BINARY_LOGICAL_OP(logical_or, "Out = X && Y"); +REGISTER_BINARY_LOGICAL_KERNEL(logical_or, CPU, + paddle::operators::LogicalOrFunctor); +REGISTER_UNARY_LOGICAL_OP(logical_not, "Out = !X"); +REGISTER_UNARY_LOGICAL_KERNEL(logical_not, CPU, + paddle::operators::LogicalNotFunctor); +REGISTER_BINARY_LOGICAL_OP(logical_xor, "Out = (X || Y) && !(X && Y)"); +REGISTER_BINARY_LOGICAL_KERNEL(logical_xor, CPU, + paddle::operators::LogicalXorFunctor); diff --git a/paddle/operators/logical_op.cu b/paddle/operators/logical_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..d41239b2ca43e7145ea56afcb0af69948838cc48 --- /dev/null +++ b/paddle/operators/logical_op.cu @@ -0,0 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/logical_op.h" + +REGISTER_BINARY_LOGICAL_KERNEL(logical_and, GPU, + paddle::operators::LogicalAndFunctor); +REGISTER_BINARY_LOGICAL_KERNEL(logical_or, GPU, + paddle::operators::LogicalOrFunctor); +REGISTER_UNARY_LOGICAL_KERNEL(logical_not, GPU, + paddle::operators::LogicalNotFunctor); +REGISTER_BINARY_LOGICAL_KERNEL(logical_xor, GPU, + paddle::operators::LogicalXorFunctor); diff --git a/paddle/operators/logical_op.h b/paddle/operators/logical_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6e78a7d6ed87ba950886e6bc667f82118ff78904 --- /dev/null +++ b/paddle/operators/logical_op.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/platform/transform.h" + +namespace paddle { +namespace operators { + +template +struct LogicalAndFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a, const T& b) const { return a && b; } +}; + +template +struct LogicalOrFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a, const T& b) const { return a || b; } +}; + +template +struct LogicalNotFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a) const { return !a; } +}; + +template +struct LogicalXorFunctor { + using ELEM_TYPE = T; + HOSTDEVICE bool operator()(const T& a, const T& b) const { + return (a || b) && !(a && b); + } +}; + +template +class BinaryLogicalOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + using T = typename Functor::ELEM_TYPE; + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Output("Out"); + Functor binary_func; + platform::Transform trans; + trans(context.device_context(), x->data(), x->data() + x->numel(), + y->data(), out->mutable_data(context.GetPlace()), + binary_func); + } +}; + +template +class UnaryLogicalOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + using T = typename Functor::ELEM_TYPE; + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + Functor unary_func; + platform::Transform trans; + trans(context.device_context(), x->data(), x->data() + x->numel(), + out->mutable_data(context.GetPlace()), unary_func); + } +}; + +} // namespace operators +} // namespace paddle + +#define REGISTER_BINARY_LOGICAL_KERNEL(op_type, dev, functor) \ + REGISTER_OP_##dev##_KERNEL( \ + op_type, ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##Place, functor>); + +#define REGISTER_UNARY_LOGICAL_KERNEL(op_type, dev, functor) \ + REGISTER_OP_##dev##_KERNEL( \ + op_type, ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##Place, functor>); diff --git a/paddle/operators/lstm_op.cu b/paddle/operators/lstm_op.cu.cc similarity index 97% rename from paddle/operators/lstm_op.cu rename to paddle/operators/lstm_op.cu.cc index 9ad56941553bf19a56c25f41f76fe20dfa3a106f..610cbb03e890203407b1489800bc17f1a196d12c 100644 --- a/paddle/operators/lstm_op.cu +++ b/paddle/operators/lstm_op.cu.cc @@ -12,7 +12,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU #include "paddle/operators/lstm_op.h" namespace ops = paddle::operators; diff --git a/paddle/operators/lstm_op.h b/paddle/operators/lstm_op.h index fca84e2d8fa832a3780eab7e0fa2facceb4d613b..721aa42c92f2926aabbc13d0a9027b2b4e573225 100644 --- a/paddle/operators/lstm_op.h +++ b/paddle/operators/lstm_op.h @@ -24,10 +24,6 @@ namespace operators { using LoDTensor = framework::LoDTensor; using Tensor = framework::Tensor; -template -using EigenMatrix = framework::EigenMatrix; - template inline void ReorderInitState(const platform::DeviceContext& ctx, const framework::Tensor& src, const size_t* index, @@ -65,16 +61,11 @@ class LSTMKernel : public framework::OpKernel { framework::DDim dims({in_dims[0], frame_size}); if (bias) { - Eigen::array extents({{1, 4 * frame_size}}); - Eigen::array offsets({{0, 0}}); - auto b = EigenMatrix::From(*bias); - auto gate = EigenMatrix::From(*batch_gate); - gate.device(ctx.GetEigenDevice()) = - gate + - b.slice(offsets, extents) - .reshape(Eigen::array({{1, frame_size * 4}})) - .broadcast( - Eigen::array({{static_cast(in_dims[0]), 1}})); + Tensor b = *bias; + b.Resize({bias->numel(), 1}); + Tensor gate_bias = b.Slice(0, 4 * frame_size); + math::RowwiseAdd add_bias; + add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } math::LstmMetaValue lstm_value; @@ -350,16 +341,11 @@ class LSTMGradKernel : public framework::OpKernel { } if (bias && bias_g) { /* backward bias */ - int m = static_cast(batch_gate_g.dims()[0]); - int n = static_cast(batch_gate_g.dims()[1]); - - Tensor ones; - ones.mutable_data({m}, ctx.GetPlace()); - math::SetConstant set; - set(device_ctx, &ones, static_cast(1.0)); - - math::gemv(device_ctx, true, m, n, 1., batch_gate_g.data(), - ones.data(), 0., bias_g->data()); + Tensor b_g = *bias_g; + b_g.Resize({bias_g->numel(), 1}); + Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); + math::ColwiseSum col_sum; + col_sum(device_ctx, batch_gate_g, &gate_bias_g); } if (h0 && h0_g) { diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index ab7f23f57043844d45c36acc475422613164bee1..3017f133afc5d4dcd484c78b44591a876ab4d667 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,31 +1,33 @@ add_subdirectory(detail) if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context operator) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context framework_proto) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) nv_library(selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) - nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) - nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) + nv_library(softmax SRCS softmax.cc softmax.cu DEPS device_context) + nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) - nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context) + nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) + nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) else() - cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) + cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) - cc_library(softmax SRCS softmax.cc DEPS operator) - cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) + cc_library(softmax SRCS softmax.cc DEPS device_context) + cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context) cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function) cc_library(vol2col SRCS vol2col.cc DEPS device_context) - cc_library(context_project SRCS context_project.cc DEPS device_context) + cc_library(context_project SRCS context_project.cc DEPS device_context math_function) cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) + cc_library(maxouting SRCS maxouting.cc DEPS device_context) endif() cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h index e0283360414fbdfb3dae2e94b45c9c8daeed3c74..72f4202bace4461d2597204feaa2a21e355bd1ac 100644 --- a/paddle/operators/math/context_project.h +++ b/paddle/operators/math/context_project.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" #include "paddle/framework/lod_tensor.h" #include "paddle/operators/math/im2col.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { @@ -24,9 +24,6 @@ namespace math { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -template -using EigenMatrix = framework::EigenMatrix; /* * \brief Context projection concatenates features in adjacent time-steps in @@ -88,13 +85,18 @@ template class ContextProjectFunctor { public: void operator()(const platform::DeviceContext& context, const LoDTensor& in, - const Tensor& padding_data, Tensor& col, - bool padding_trainable, int context_start, int context_length, - int context_stride, int up_pad, int down_pad) { + const Tensor& padding_data, bool padding_trainable, + const int context_start, const int context_length, + const int context_stride, const int up_pad, + const int down_pad, Tensor* col) { auto lod_level_0 = in.lod()[0]; math::Im2ColFunctor im2col_ocf; + std::vector dilation({1, 1}); + std::vector padding({up_pad, 0, down_pad, 0}); + std::vector stride({context_stride, 1}); + int input_row_begin, input_row_end; int sequence_height, sequence_width; sequence_width = in.dims()[1]; @@ -105,8 +107,8 @@ class ContextProjectFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -123,17 +125,14 @@ class ContextProjectFunctor { {1, input_row_end - input_row_begin, sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - - im2col_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, up_pad, - down_pad, 0, 0); + im2col_ocf(context, in_t, dilation, stride, padding, &out_t); out_t.Resize({sequence_height, context_length * sequence_width}); } } if (padding_trainable) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -150,9 +149,7 @@ class ContextProjectFunctor { Tensor out_t_sub = out_t.Slice(k * context_length, k * context_length + padding_size); Tensor w_sub = padding_data.Slice(k, k + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); } } if (down_pad > 0) { // add down pad @@ -182,9 +179,7 @@ class ContextProjectFunctor { (down_pad_begin_row + t) * context_length); Tensor w_sub = padding_data.Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - out_t_sub_e.device(*context.GetEigenDevice()) = w_sub_e; + out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); } } out_t.Resize({sequence_height, context_length * sequence_width}); @@ -196,14 +191,19 @@ class ContextProjectFunctor { template class ContextProjectGradFunctor { public: - void operator()(const platform::DeviceContext& context, LoDTensor& in, - Tensor& padding_data, Tensor& col, bool padding_trainable, - int context_start, int context_length, int context_stride, - int up_pad, int down_pad, bool input_grad, bool pad_grad) { + void operator()(const platform::DeviceContext& context, const LoDTensor& in, + bool padding_trainable, const int context_start, + const int context_length, const int context_stride, + const int up_pad, const int down_pad, bool pad_grad, + bool input_grad, Tensor* padding_data, Tensor* col) { auto lod_level_0 = in.lod()[0]; math::Col2ImFunctor col2im_ocf; + std::vector dilation({1, 1}); + std::vector padding({up_pad, 0, down_pad, 0}); + std::vector stride({context_stride, 1}); + int input_row_begin, input_row_end; int sequence_height, sequence_width; sequence_width = in.dims()[1]; @@ -215,8 +215,8 @@ class ContextProjectGradFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -234,9 +234,7 @@ class ContextProjectGradFunctor { sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - col2im_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, - up_pad, down_pad, 0, 0); + col2im_ocf(context, out_t, dilation, stride, padding, &in_t); out_t.Resize({sequence_height, context_length * sequence_width}); } } @@ -244,8 +242,8 @@ class ContextProjectGradFunctor { if (pad_grad) { if (padding_trainable) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); out_t.Resize({sequence_height * context_length, sequence_width}); @@ -259,11 +257,9 @@ class ContextProjectGradFunctor { k + context_length < up_pad ? context_length : up_pad - k; Tensor out_t_sub = out_t.Slice(k * context_length, k * context_length + padding_size); - Tensor w_sub = padding_data.Slice(k, k + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - w_sub_e.device(*context.GetEigenDevice()) = - w_sub_e + out_t_sub_e; + Tensor w_sub = padding_data->Slice(k, k + padding_size); + axpy(context, w_sub.numel(), static_cast(1), + out_t_sub.data(), w_sub.data()); } } if (down_pad > 0) { @@ -292,12 +288,10 @@ class ContextProjectGradFunctor { Tensor out_t_sub = out_t.Slice( (down_pad_begin_row + t) * context_length - padding_size, (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data.Slice( + Tensor w_sub = padding_data->Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); - auto out_t_sub_e = EigenMatrix::From(out_t_sub); - auto w_sub_e = EigenMatrix::From(w_sub); - w_sub_e.device(*context.GetEigenDevice()) = - w_sub_e + out_t_sub_e; + axpy(context, w_sub.numel(), static_cast(1), + out_t_sub.data(), w_sub.data()); } } out_t.Resize({sequence_height, context_length * sequence_width}); diff --git a/paddle/operators/math/cross_entropy.h b/paddle/operators/math/cross_entropy.h index 0ab6827ffa8f8b90b432a801607a97206e010cf4..70ed9ddd551bb8cb7989727c02fea870186c9f2e 100644 --- a/paddle/operators/math/cross_entropy.h +++ b/paddle/operators/math/cross_entropy.h @@ -14,7 +14,6 @@ #pragma once #include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" #include "paddle/framework/tensor.h" #include "paddle/platform/hostdevice.h" diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index 3b1b0bd71dd3768b932864e185af8dc839b4653e..c10c44c52076c8ee56eee3a0d82c31df70a1c9c7 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -28,57 +28,55 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); + PADDLE_ENFORCE(col->dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[1]; - int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[1]; + int filter_width = col->dims()[2]; + int col_height = col->dims()[3]; + int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " - "inconsistent."); + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + ((dilation[0] * (filter_height - 1) + 1))) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + ((dilation[1] * (filter_width - 1) + 1))) / + stride[1] + + 1, + col_width, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); - int channels_col = input_channels * filter_height * filter_width; + int channels_col = im_channels * filter_height * filter_width; const T* im_data = im.data(); - T* col_data = col.data(); + T* col_data = col->data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; int h_offset = (c / filter_width) % filter_height; int c_im = c / filter_width / filter_height; - for (int h = 0; h < output_height; ++h) { - for (int w = 0; w < output_width; ++w) { - int im_row_idx = h * stride_height + h_offset - padding_up; - int im_col_idx = w * stride_width + w_offset - padding_left; + for (int h = 0; h < col_height; ++h) { + for (int w = 0; w < col_width; ++w) { + int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0]; + int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; + int col_idx = (c * col_height + h) * col_width + w; + int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx; - if (im_row_idx < 0 || im_row_idx >= input_height || im_col_idx < 0 || - im_col_idx >= input_width) { - col_data[(c * output_height + h) * output_width + w] = T(0); - } else { - im_row_idx += c_im * input_height; - col_data[(c * output_height + h) * output_width + w] = - im_data[im_row_idx * input_width + im_col_idx]; - } + col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height || + im_col_idx < 0 || im_col_idx >= im_width) + ? static_cast(0) + : im_data[im_idx]; } } } @@ -94,54 +92,55 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[1]; int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; + int col_height = col.dims()[3]; + int col_width = col.dims()[4]; - PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " - "inconsistent."); + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + ((dilation[0] * (filter_height - 1) + 1))) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + ((dilation[1] * (filter_width - 1) + 1))) / + stride[1] + + 1, + col_width, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); - int channels_col = input_channels * filter_height * filter_width; + int channels_col = im_channels * filter_height * filter_width; - T* im_data = im.data(); + T* im_data = im->data(); const T* col_data = col.data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; int h_offset = (c / filter_width) % filter_height; int c_im = c / filter_width / filter_height; - for (int h = 0; h < output_height; ++h) { - for (int w = 0; w < output_width; ++w) { - int im_row_idx = h * stride_height + h_offset - padding_up; - int im_col_idx = w * stride_width + w_offset - padding_left; + for (int h = 0; h < col_height; ++h) { + for (int w = 0; w < col_width; ++w) { + int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0]; + int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; - if ((im_row_idx) >= 0 && (im_row_idx) < input_height && - (im_col_idx) >= 0 && (im_col_idx) < input_width) { - im_row_idx += c_im * input_height; - im_data[im_row_idx * input_width + im_col_idx] += - col_data[(c * output_height + h) * output_width + w]; + if ((im_row_idx) >= 0 && (im_row_idx) < im_height && + (im_col_idx) >= 0 && (im_col_idx) < im_width) { + im_row_idx += c_im * im_height; + im_data[im_row_idx * im_width + im_col_idx] += + col_data[(c * col_height + h) * col_width + w]; } } } @@ -168,64 +167,59 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[3]; - int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; + PADDLE_ENFORCE(col->dims().size() == 5); + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[3]; + int filter_width = col->dims()[4]; + int col_height = col->dims()[0]; + int col_width = col->dims()[1]; PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, + (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, + col_height, "Output_height and padding(padding_up, padding_down) are " "inconsistent."); PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " + (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " "inconsistent."); const T* im_data = im.data(); - T* col_data = col.data(); + T* col_data = col->data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { - for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { - for (int channel = 0; channel < input_channels; ++channel) { + for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { + for (int channel = 0; channel < im_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; ++filter_row_idx) { for (int filter_col_idx = 0; filter_col_idx < filter_width; ++filter_col_idx) { int im_row_offset = - col_row_idx * stride_height + filter_row_idx - padding_up; + col_row_idx * stride[0] + filter_row_idx - padding[0]; int im_col_offset = - col_col_idx * stride_width + filter_col_idx - padding_left; - int col_offset = ((((col_row_idx)*output_width + col_col_idx) * - input_channels + - channel) * - filter_height + - filter_row_idx) * - filter_width + - filter_col_idx; - if (im_row_offset < 0 || im_row_offset >= input_height || - im_col_offset < 0 || im_col_offset >= input_width) { - col_data[col_offset] = T(0); - } else { - int im_offset = - (channel * input_height + im_row_offset) * input_width + - im_col_offset; - col_data[col_offset] = im_data[im_offset]; - } + col_col_idx * stride[1] + filter_col_idx - padding[1]; + int col_offset = + ((((col_row_idx)*col_width + col_col_idx) * im_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + + int im_offset = (channel * im_height + im_row_offset) * im_width + + im_col_offset; + col_data[col_offset] = + (im_row_offset < 0 || im_row_offset >= im_height || + im_col_offset < 0 || im_col_offset >= im_width) + ? static_cast(0) + : im_data[im_offset]; } } } @@ -243,60 +237,57 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[3]; int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; + int col_height = col.dims()[0]; + int col_width = col.dims()[1]; PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, + (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, + col_height, "Output_height and padding(padding_up, padding_down) are " "inconsistent."); PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " + (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " "inconsistent."); - T* im_data = im.data(); + T* im_data = im->data(); const T* col_data = col.data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { - for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { - for (int channel = 0; channel < input_channels; ++channel) { + for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { + for (int channel = 0; channel < im_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; ++filter_row_idx) { for (int filter_col_idx = 0; filter_col_idx < filter_width; ++filter_col_idx) { int im_row_offset = - col_row_idx * stride_height + filter_row_idx - padding_up; + col_row_idx * stride[0] + filter_row_idx - padding[0]; int im_col_offset = - col_col_idx * stride_width + filter_col_idx - padding_left; - int col_offset = (((col_row_idx * output_width + col_col_idx) * - input_channels + - channel) * - filter_height + - filter_row_idx) * - filter_width + - filter_col_idx; - if (im_row_offset >= 0 && im_row_offset < input_height && - im_col_offset >= 0 && im_col_offset < input_width) { + col_col_idx * stride[1] + filter_col_idx - padding[1]; + int col_offset = + (((col_row_idx * col_width + col_col_idx) * im_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + if (im_row_offset >= 0 && im_row_offset < im_height && + im_col_offset >= 0 && im_col_offset < im_width) { int im_offset = - (channel * input_height + im_row_offset) * input_width + + (channel * im_height + im_row_offset) * im_width + im_col_offset; im_data[im_offset] += col_data[col_offset]; } diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 7b201fdbf3c5dd7d336d359e00b7323cecc0231a..bf7894243919571c2ab15d53690b1ef05bfcc6ee 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -20,36 +20,32 @@ namespace operators { namespace math { template -__global__ void im2col(const T* data_im, int num_outs, int height, int width, +__global__ void im2col(const T* data_im, int num_outs, int im_height, + int im_width, int dilation_h, int dilation_w, int filter_height, int filter_width, int stride_height, int stride_width, int padding_height, int padding_width, - int output_height, int output_width, T* data_col) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + int col_height, int col_width, T* data_col) { + const int index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; if (index < num_outs) { - int w_out = index % output_width; - index /= output_width; - int h_out = index % output_height; - int channel_in = index / output_height; + int w_out = index % col_width; + int h_out = (index / col_width) % col_height; + int channel_in = index / col_width / col_height; int channel_out = channel_in * filter_height * filter_width; - int h_in = h_out * stride_height; - int w_in = w_out * stride_width; + int h_in = h_out * stride_height - padding_height; + int w_in = w_out * stride_width - padding_width; - data_col += (channel_out * output_height + h_out) * output_width + w_out; + data_col += (channel_out * col_height + h_out) * col_width + w_out; + data_im += (channel_in * im_height + h_in) * im_width + w_in; for (int i = 0; i < filter_height; ++i) { for (int j = 0; j < filter_width; ++j) { - int rIdx = int(h_in + i); - int cIdx = int(w_in + j); - if ((rIdx - (int)padding_height) >= (int)height || - (rIdx - (int)padding_height) < 0 || - (cIdx - (int)padding_width) >= (int)width || - (cIdx - (int)padding_width) < 0) { - *data_col = 0; - } else { - rIdx = rIdx + channel_in * height - padding_height; - cIdx = cIdx - padding_width; - *data_col = data_im[rIdx * width + cIdx]; - } - data_col += output_height * output_width; + int rIdx = h_in + i * dilation_h; + int cIdx = w_in + j * dilation_w; + *data_col = + (rIdx >= im_height || rIdx < 0 || cIdx >= im_width || cIdx < 0) + ? 0 + : data_im[i * dilation_h * im_width + j * dilation_w]; + data_col += col_height * col_width; } } } @@ -65,30 +61,36 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[1]; - int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); - - int num_outputs = input_channels * output_height * output_width; + PADDLE_ENFORCE(col->dims().size() == 5); + + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[1]; + int filter_width = col->dims()[2]; + int col_height = col->dims()[3]; + int col_width = col->dims()[4]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); + + int num_outputs = im_channels * col_height * col_width; int blocks = (num_outputs + 1024 - 1) / 1024; int block_x = 512; int block_y = (blocks + 512 - 1) / 512; @@ -97,56 +99,57 @@ class Im2ColFunctor<<(context) .stream()>>>( - im.data(), num_outputs, input_height, input_width, filter_height, - filter_width, stride_height, stride_width, padding_up, padding_left, - output_height, output_width, col.data()); + im.data(), num_outputs, im_height, im_width, dilation[0], + dilation[1], filter_height, filter_width, stride[0], stride[1], + padding[0], padding[1], col_height, col_width, col->data()); } }; template -__global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, - size_t channels, size_t filter_height, - size_t filter_width, size_t stride_height, - size_t stride_width, size_t padding_height, - size_t padding_width, size_t output_height, - size_t output_width, T* data_im) { - size_t index = +__global__ void col2im(int n, const T* data_col, int im_height, int im_width, + int dilation_h, int dilation_w, int filter_height, + int filter_width, int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* data_im) { + const int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + + const int d_filter_height = dilation_h * (filter_height - 1) + 1; + const int d_filter_width = dilation_w * (filter_width - 1) + 1; + if (index < n) { T val = 0; - int w = int(index % width); - int h = int((index / width) % height); - int c = int(index / (width * height)); - if ((w - (int)padding_width) >= 0 && - (w - (int)padding_width) < (width - 2 * padding_width) && - (h - (int)padding_height) >= 0 && - (h - padding_height) < (height - 2 * padding_height)) { - // compute the start and end of the output - int w_col_start = (w < (int)filter_width) - ? 0 - : (w - int(filter_width)) / (int)stride_width + 1; - int w_col_end = - min((int)(w / (int)stride_width + 1), (int)(output_width)); - int h_col_start = (h < (int)filter_height) - ? 0 - : (h - (int)filter_height) / (int)stride_height + 1; - int h_col_end = min(int(h / stride_height + 1), int(output_height)); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * filter_height * filter_width) + - (h - h_col * (int)stride_height) * (int)filter_width + - (w - w_col * (int)stride_width); - val += - data_col[(c_col * output_height + h_col) * output_width + w_col]; + int w = index % im_width + padding_width; + int h = (index / im_width) % im_height + padding_height; + int c = index / (im_width * im_height); + + // compute the start and end of the output + int w_col_start = + (w < d_filter_width) ? 0 : (w - d_filter_width) / stride_width + 1; + int w_col_end = min(w / stride_width + 1, col_width); + int h_col_start = + (h < d_filter_height) ? 0 : (h - d_filter_height) / stride_height + 1; + int h_col_end = min(h / stride_height + 1, col_height); + + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + int h_off = (h - h_col * stride_height); + int w_off = (w - w_col * stride_width); + if (h_off % dilation_h == 0 && w_off % dilation_w == 0) { + h_off /= dilation_h; + w_off /= dilation_w; + int data_col_index = + (((c * filter_height + h_off) * filter_width + w_off) * + col_height + + h_col) * + col_width + + w_col; + + val += data_col[data_col_index]; } } - h -= padding_height; - w -= padding_width; - data_im[c * ((width - 2 * padding_width) * - (height - 2 * padding_height)) + - h * (width - 2 * padding_width) + w] += val; } + data_im[index] = val; } } @@ -159,33 +162,38 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[1]; int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); - - size_t num_kernels = input_channels * - (input_height + padding_up + padding_down) * - (input_width + padding_left + padding_right); + int col_height = col.dims()[3]; + int col_width = col.dims()[4]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); + + size_t num_kernels = im_channels * im_height * im_width; size_t blocks = (num_kernels + 1024 - 1) / 1024; size_t block_x = 512; @@ -198,10 +206,9 @@ class Col2ImFunctor<<(context) .stream()>>>( - num_kernels, col.data(), input_height + padding_up + padding_down, - input_width + padding_left + padding_left, input_channels, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width, im.data()); + num_kernels, col.data(), im_height, im_width, dilation[0], + dilation[1], filter_height, filter_width, stride[0], stride[1], + padding[0], padding[2], col_height, col_width, im->data()); } }; @@ -215,33 +222,32 @@ template class Col2ImFunctor; template -__global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, - int input_height, int input_width, int filter_height, - int filter_width, int stride_height, int stride_width, - int padding_height, int padding_width, - int output_height, int output_width) { +__global__ void im2colOCF(const T* im_data, int im_channels, int im_height, + int im_width, int filter_height, int filter_width, + int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* col_data) { int swid = blockIdx.x; int shid = blockIdx.y; - for (int channelid = threadIdx.z; channelid < input_channels; + for (int channelid = threadIdx.z; channelid < im_channels; channelid += blockDim.z) { for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; int height_offset = idy + shid * stride_height - padding_height; - int im_offset = width_offset + height_offset * input_width + - channelid * input_height * input_width; + int im_offset = width_offset + height_offset * im_width + + channelid * im_height * im_width; int col_offset = idx + idy * filter_width + channelid * filter_height * filter_width + - (shid * output_width + swid) * - (input_channels * filter_height * filter_width); - - if (height_offset >= input_height || height_offset < 0 || - width_offset >= input_width || width_offset < 0) { - col_data[col_offset] = T(0); - } else { - col_data[col_offset] = im_data[im_offset]; - } + (shid * col_width + swid) * + (im_channels * filter_height * filter_width); + + col_data[col_offset] = + (height_offset >= im_height || height_offset < 0 || + width_offset >= im_width || width_offset < 0) + ? T(0) + : im_data[im_offset]; } } } @@ -257,27 +263,33 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[3]; - int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); + PADDLE_ENFORCE(col->dims().size() == 5); + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[3]; + int filter_width = col->dims()[4]; + int col_height = col->dims()[0]; + int col_width = col->dims()[1]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); int block_dim_x = 0; int block_dim_y = 0; @@ -296,42 +308,41 @@ class Im2ColFunctor<<(context) .stream()>>>( - im.data(), col.data(), input_channels, input_height, input_width, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width); + im.data(), im_channels, im_height, im_width, filter_height, + filter_width, stride[0], stride[1], padding[0], padding[1], col_height, + col_width, col->data()); } }; template -__global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, - int input_height, int input_width, int filter_height, - int filter_width, int stride_height, int stride_width, - int padding_height, int padding_width, - int output_height, int output_width) { +__global__ void col2imOCF(const T* col_data, int im_channels, int im_height, + int im_width, int filter_height, int filter_width, + int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* im_data) { int swid = blockIdx.x; int shid = blockIdx.y; - for (int channelid = threadIdx.z; channelid < input_channels; + for (int channelid = threadIdx.z; channelid < im_channels; channelid += blockDim.z) { for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; int height_offset = idy + shid * stride_height - padding_height; - int im_offset = width_offset + height_offset * input_width + - channelid * input_height * input_width; + int im_offset = width_offset + height_offset * im_width + + channelid * im_height * im_width; int col_offset = idx + idy * filter_width + channelid * filter_height * filter_width + - (shid * output_width + swid) * - (input_channels * filter_height * filter_width); + (shid * col_width + swid) * + (im_channels * filter_height * filter_width); - if (height_offset >= 0 && height_offset < input_height && - width_offset >= 0 && width_offset < input_width) { + if (height_offset >= 0 && height_offset < im_height && + width_offset >= 0 && width_offset < im_width) { paddle::platform::CudaAtomicAdd(im_data + im_offset, col_data[col_offset]); } @@ -349,28 +360,35 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[3]; int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); + int col_height = col.dims()[0]; + int col_width = col.dims()[1]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); int block_dim_x = 0; int block_dim_y = 0; @@ -389,15 +407,14 @@ class Col2ImFunctor<<(context) .stream()>>>( - im.data(), col.data(), input_channels, input_height, input_width, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width); + col.data(), im_channels, im_height, im_width, filter_height, + filter_width, stride[0], stride[1], padding[0], padding[1], col_height, + col_width, im->data()); } }; diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index c736d4fa523c2af3e3dd7a11114d7f84021bc5c1..deb60051beef56437cf75f0fa2cef90bbc0a209a 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -35,6 +35,15 @@ enum class ColFormat { kCFO = 0, kOCF = 1 }; * \param colData Column data. * \param colShape The shape of colData. * + * \param dilations dilation data. + * \param 2-dimension [dilation_height, dilation_width]. + * + * \param strides stride data. + * \param 2-dimension [stride_height, stride_width]. + * + * \param paddings padding data. + * \param 4-dimension [up_pad, left_pad, down_pad, right_pad]. + * * If the template argument Format is kCFO, the shape of colData is: * [input_channels, filter_height, filter_width, output_height, output_width] * So, it is easy to reshape into a convolution matrix for convolution @@ -73,18 +82,19 @@ template class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right); + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col); }; template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im); }; } // namespace math diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 5763782c4edec87f44dabef2ccffe3097eeb2421..10c28da72ba9d3b94bb59c5cf00e7f5a2f28fd06 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -45,10 +45,14 @@ void testIm2col() { int input_height = 2; int input_width = 3; int filter_size = 2; - int stride = 1; - int padding = 0; - int output_height = (input_height - filter_size + 2 * padding) / stride + 1; - int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + std::vector stride({1, 1}); // stride_y, stride_x + std::vector padding( + {0, 0, 0, 0}); // up_pad, left_pad, down_pad, right_pad + std::vector dilation({1, 1}); // dilation_y, dilation_x + int output_height = + (input_height - filter_size + padding[0] + padding[1]) / stride[0] + 1; + int output_width = + (input_width - filter_size + padding[2] + padding[3]) / stride[1] + 1; float* input_ptr = input_tmp.mutable_data( {1, input_height, input_width}, paddle::platform::CPUPlace()); float arr[6] = {0, 1, 2, 3, 4, 5}; @@ -85,10 +89,8 @@ void testIm2col() { paddle::operators::math::ColFormat::kOCF, Place, float> im2col_ocf; - im2col(*context, input, output_cfo, stride, stride, padding, padding, padding, - padding); - im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding, - padding, padding); + im2col(*context, input, dilation, stride, padding, &output_cfo); + im2col_ocf(*context, input, dilation, stride, padding, &output_ocf); float out_cfo_data[] = {0, 1, 1, 2, 3, 4, 4, 5}; float out_ocf_data[] = {0, 1, 3, 4, 1, 2, 4, 5}; @@ -131,8 +133,7 @@ void testIm2col() { input.CopyFrom(input_tmp, *place, *context); } - col2im(*context, input, output_cfo, stride, stride, padding, padding, padding, - padding); + col2im(*context, output_cfo, dilation, stride, padding, &input); float* in_ptr; if (paddle::platform::is_cpu_place(*place)) { @@ -153,8 +154,7 @@ void testIm2col() { input.CopyFrom(input_tmp, *place, *context); } - col2im_ocf(*context, input, output_ocf, stride, stride, padding, padding, - padding, padding); + col2im_ocf(*context, output_ocf, dilation, stride, padding, &input); if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index 1b0d4c8bdc683b5203a4bc4b3838560cffe00bc8..2e333a8cde721f8e65dbf2cf5e3aac6272172cc0 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/operators/math/math_function.h" #include "paddle/framework/data_type.h" +#include "paddle/operators/math/math_function_impl.h" namespace paddle { namespace operators { @@ -232,7 +233,36 @@ void gemv(const platform::DeviceContext& context, cblas_dgemv(CblasRowMajor, transA, M, N, alpha, A, N, B, 1, beta, C, 1); } +template <> +void axpy(const platform::DeviceContext& context, + const int n, const float alpha, + const float* x, float* y) { + cblas_saxpy(n, alpha, x, 1, y, 1); +} + +template <> +void axpy(const platform::DeviceContext& context, + const int n, const double alpha, + const double* x, double* y) { + cblas_daxpy(n, alpha, x, 1, y, 1); +} + template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; + +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; + +DEFINE_CPU_TRANS(1); +DEFINE_CPU_TRANS(2); +DEFINE_CPU_TRANS(3); +DEFINE_CPU_TRANS(4); +DEFINE_CPU_TRANS(5); +DEFINE_CPU_TRANS(6); struct TensorSetConstantCPU { TensorSetConstantCPU(framework::Tensor* tensor, float value) @@ -280,6 +310,11 @@ void set_constant(const platform::DeviceContext& context, #endif } +template struct RowwiseAdd; +template struct RowwiseAdd; +template struct ColwiseSum; +template struct ColwiseSum; + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 817deec94314bdfd2ed7e4b0ba5212c72b813455..58356a4b7783241ca0292829bf05dc1a8ed80c6c 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#define EIGEN_USE_GPU #include "paddle/framework/data_type.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/math_function_impl.h" namespace paddle { namespace operators { @@ -231,11 +233,46 @@ void gemv(const platform::DeviceContext& context, cuTransA, N, M, &alpha, A, N, B, 1, &beta, C, 1)); } +template <> +void axpy(const platform::DeviceContext& context, + const int n, const float alpha, + const float* x, float* y) { + PADDLE_ENFORCE(platform::dynload::cublasSaxpy( + reinterpret_cast(context) + .cublas_handle(), + n, &alpha, x, 1, y, 1)); +} + +template <> +void axpy(const platform::DeviceContext& context, + const int n, const double alpha, + const double* x, double* y) { + PADDLE_ENFORCE(platform::dynload::cublasDaxpy( + reinterpret_cast(context) + .cublas_handle(), + n, &alpha, x, 1, y, 1)); +} + template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; + +#define DEFINE_GPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; + +DEFINE_GPU_TRANS(1); +DEFINE_GPU_TRANS(2); +DEFINE_GPU_TRANS(3); +DEFINE_GPU_TRANS(4); +DEFINE_GPU_TRANS(5); +DEFINE_GPU_TRANS(6); struct TensorSetConstantGPU { TensorSetConstantGPU(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) + framework::Tensor* tensor, float value) : context_(context), tensor_(tensor), value_(value) {} template @@ -257,6 +294,11 @@ void set_constant_with_place( TensorSetConstantGPU(context, tensor, value)); } +template struct RowwiseAdd; +template struct RowwiseAdd; +template struct ColwiseSum; +template struct ColwiseSum; + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index c2aaa1d7b7e920c3e6fd9ae4424eae725c3b7c0e..ffb99f53808c4316ede96b04e57aec4dae4134de 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -93,14 +93,21 @@ void gemv(const platform::DeviceContext& context, const bool trans_a, const int M, const int N, const T alpha, const T* A, const T* B, const T beta, T* C); +template +void axpy(const platform::DeviceContext& context, const int n, const T alpha, + const T* x, T* y); + +template +struct Transpose { + void operator()(const platform::DeviceContext& context, + const framework::Tensor& in, framework::Tensor* out, + const std::vector& axis); +}; + template struct SetConstant { void operator()(const platform::DeviceContext& context, - framework::Tensor* tensor, T num) { - auto t = framework::EigenVector::Flatten(*tensor); - t.device(*context.GetEigenDevice()) = - t.constant(static_cast(num)); - } + framework::Tensor* tensor, T num); }; template @@ -110,6 +117,19 @@ void set_constant_with_place(const platform::DeviceContext& context, void set_constant(const platform::DeviceContext& context, framework::Tensor* tensor, float value); +template +struct RowwiseAdd { + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, const framework::Tensor& vec, + framework::Tensor* output); +}; + +template +struct ColwiseSum { + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* vec); +}; + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/math_function_impl.h b/paddle/operators/math/math_function_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..4dc17a4e525c52b8f696277274a7ad00a6b00a08 --- /dev/null +++ b/paddle/operators/math/math_function_impl.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/data_type.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template +void SetConstant::operator()(const platform::DeviceContext& context, + framework::Tensor* tensor, T num) { + auto t = framework::EigenVector::Flatten(*tensor); + t.device(*context.GetEigenDevice()) = t.constant(static_cast(num)); +} + +template +void Transpose::operator()( + const platform::DeviceContext& context, const framework::Tensor& in, + framework::Tensor* out, const std::vector& axis) { + Eigen::array permute; + for (int i = 0; i < Rank; i++) { + permute[i] = axis[i]; + } + auto in_dim = in.dims(); + auto out_dim = out->dims(); + + auto eigen_in = framework::EigenTensor::From(in); + auto eigen_out = framework::EigenTensor::From(*out); + auto* dev = context.GetEigenDevice(); + eigen_out.device(*dev) = eigen_in.shuffle(permute); +} + +template +void RowwiseAdd::operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& vector, + framework::Tensor* output) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector.numel(), size); + PADDLE_ENFORCE_EQ(output->dims(), in_dims); + + auto in = framework::EigenMatrix::From(input); + auto vec = framework::EigenMatrix::From(vector); + auto out = framework::EigenMatrix::From(*output); + Eigen::array shape({{1, static_cast(size)}}); + Eigen::array bcast({{static_cast(in_dims[0]), 1}}); + out.device(*context.GetEigenDevice()) = + in + vec.reshape(shape).broadcast(bcast); +} + +template +void ColwiseSum::operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), size); + + auto vec = framework::EigenMatrix::From(*vector); + auto in = framework::EigenMatrix::From(input); + Eigen::array shape({{1, static_cast(size)}}); + vec.reshape(shape).device(*context.GetEigenDevice()) = + in.sum(Eigen::array({{0}})).reshape(shape); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/maxouting.cc b/paddle/operators/math/maxouting.cc new file mode 100644 index 0000000000000000000000000000000000000000..e5168ce7afd4139475afa6edd5999b9974407c9b --- /dev/null +++ b/paddle/operators/math/maxouting.cc @@ -0,0 +1,106 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/maxouting.h" + +namespace paddle { +namespace operators { +namespace math { + +// All tensors are in NCHW format, and the groups must be greater than 1 +template +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + framework::Tensor * output, + int groups) { + const int batch_size = input.dims()[0]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output->dims()[1]; + int fea_size = input_height * input_width; + // c_size means the output size of each sample + int c_size = fea_size * output_channels; + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + + for (int i = 0; i < batch_size; ++i) { + int new_bindex = c_size * i; + for (int c = 0; c < output_channels; ++c) { + int new_cindex = fea_size * c; + for (int f = 0; f < fea_size; ++f) { + T ele = static_cast(-FLT_MAX); + for (int ph = 0; ph < groups; ++ph) { + T x = input_data[(new_bindex + new_cindex) * groups + + ph * fea_size + f]; + ele = ele > x ? ele : x; + } + output_data[(new_bindex+new_cindex+f)] = ele; + } + } + } + } +}; + + + +template +class MaxOutGradFunctor { +public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + framework::Tensor * input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, + int groups) { + const int batch_size = input.dims()[0]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output.dims()[1]; + int fea_size = input_height * input_width; + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + for (int i = 0; i < batch_size; ++i) { + int blen = fea_size * output_channels * i; + for (int c = 0; c < output_channels; ++c) { + int clen = fea_size * c; + for (int f = 0; f < fea_size; ++f) { + int input_idx0 = (blen + clen) * groups + f; + bool continue_match = true; + int output_idx = blen + clen + f; + for (int g = 0; g < groups && continue_match; ++g) { + int input_idx = input_idx0 + fea_size * g; + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += output_grad_data[output_idx]; + continue_match = false; + } + } + } + } + } + } +}; + +template class MaxOutGradFunctor; +template class MaxOutGradFunctor; +template class MaxOutFunctor; +template class MaxOutFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/maxouting.cu b/paddle/operators/math/maxouting.cu new file mode 100644 index 0000000000000000000000000000000000000000..7c698577b8a8258a58ba9a2b6c675457b2458a5b --- /dev/null +++ b/paddle/operators/math/maxouting.cu @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 paddlepaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/maxouting.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { +namespace math { + +template +__global__ void KernelMaxOut(const int nthreads, const T* input_data, + const int channels, + const int input_height, const int input_width, + int groups, T* output_data ) { + const int size = input_height * input_width * channels / groups; + const int feat_len = input_height * input_width; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int batch_idx = i / size; + int batch_offset = i % size; + int channel_idx = batch_offset / feat_len; + int feat_idx = batch_offset % feat_len; + int data_idx = + (batch_idx * size + channel_idx * feat_len) * groups + feat_idx; + T ele = static_cast(-FLT_MAX); + for (int g = 0; g < groups; ++g) { + T x = input_data[data_idx + g * feat_len]; + ele = ele > x ? ele : x; + } + output_data[i] = ele; + } +} +template +__global__ void KernelMaxoutGrad( + const int nthreads, const T* input_data, const T* output_data, + const T* output_grad, T* input_grad, const int channels, + const int input_height, const int input_width, int groups) { + const int size = input_height * input_width * channels / groups; + const int feat_len = input_height * input_width; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int batch_idx = i / size; + int batch_offset = i % size; + int channel_idx = batch_offset / feat_len; + int feat_idx = batch_offset % feat_len; + int data_idx = + (batch_idx * size + channel_idx * feat_len) * groups + feat_idx; + int max_index = -1; + bool continue_match = true; + for (int g = 0; g < groups && continue_match; ++g) { + if (input_data[data_idx + g * feat_len] == output_data[i]) { + max_index = data_idx + g * feat_len; + continue_match = false; + break; + } + } + if (max_index != -1) { + input_grad[max_index] += output_grad[index]; + } + } +} +/* + * All tensors are in NCHW format. + */ +template +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor * output, + int groups) { + const int batch_size = input.dims()[0]; + const int input_channels = input.dims()[1]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + int nthreads = output->numel(); + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxOut< + T><<(context) + .stream()>>>(nthreads, input_data, input_channels, + input_height, input_width, groups, + output_data); + } +}; +/* + * All tensors are in NCHW format. + */ +template +class MaxOutGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + framework::Tensor * input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, + int groups) { + const int batch_size = input.dims()[0]; + const int input_channels = input.dims()[1]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output.dims()[1]; + const int output_height = output.dims()[2]; + const int output_width = output.dims()[3]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + int nthreads = output.numel(); + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxoutGrad< + T><<(context) + .stream()>>>( + nthreads, input_data, output_data, output_grad_data, input_grad_data, + input_channels, input_height, input_width, groups); + } +}; + +template class MaxOutGradFunctor; +template class MaxOutGradFunctor; + +template class MaxOutFunctor; +template class MaxOutFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/maxouting.h b/paddle/operators/math/maxouting.h new file mode 100644 index 0000000000000000000000000000000000000000..d4c9da38ab8f8d88ed461d805ae64a015db968c4 --- /dev/null +++ b/paddle/operators/math/maxouting.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/hostdevice.h" + +namespace paddle { +namespace operators { +namespace math { + +#define FLT_MAX \ + __FLT_MAX__ + +template + +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor * output, + int groups); +}; + +template +class MaxOutGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, + framework::Tensor * input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, int groups); +}; +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/pooling.cc b/paddle/operators/math/pooling.cc index ead89e146f32ef005b06f4f6f04224d691805d74..135984586a67f666425f81456148c3623ed7ef25 100644 --- a/paddle/operators/math/pooling.cc +++ b/paddle/operators/math/pooling.cc @@ -498,8 +498,8 @@ template class Pool3dGradFunctor< * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor { int wend = std::min(wstart + ksize_width, input_width); wstart = std::max(wstart, 0); - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor { } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; /* * All tensors are in NCDHW format. * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor { wstart = std::max(wstart, 0); int output_idx = (pd * output_height + ph) * output_width + pw; - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { @@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/pooling.cu b/paddle/operators/math/pooling.cu index 6d1138ad50cb095e85b4ceb44fa81731316f10dd..ca3560f264b59057fd655084f3d43adc617c6606 100644 --- a/paddle/operators/math/pooling.cu +++ b/paddle/operators/math/pooling.cu @@ -658,13 +658,13 @@ template class Pool3dGradFunctor< template class Pool3dGradFunctor< platform::GPUPlace, paddle::operators::math::AvgPoolGrad, double>; -template +template __global__ void KernelMaxPool2dWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, - const int padding_width, T* output_data, T* mask_data) { + const int padding_width, T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx( wstart = max(wstart, 0); input_data += (batch_idx * channels + c) * input_height * input_width; - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx( } } -template +template __global__ void KernelMaxPool2DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask_data, + const int nthreads, const T1* output_grad, const T2* mask_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, - const int padding_height, const int padding_width, T* input_grad) { + const int padding_height, const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_featuremap_idx = h_offset * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_height * output_width; @@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad( * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool2dWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_height, input_width, output_height, output_width, ksize_height, ksize_width, stride_height, stride_width, padding_height, padding_width, output_data, mask_data); @@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool2DWithIdxGrad< - T><<(context) - .stream()>>>(nthreads, output_grad_data, mask_data, - input_channels, input_height, input_width, - output_height, output_width, ksize_height, - ksize_width, stride_height, stride_width, - padding_height, padding_width, input_grad_data); + T1, T2><<(context) + .stream()>>>( + nthreads, output_grad_data, mask_data, input_channels, input_height, + input_width, output_height, output_width, ksize_height, ksize_width, + stride_height, stride_width, padding_height, padding_width, + input_grad_data); } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; -template +template __global__ void KernelMaxPool3DWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, const int padding_height, const int padding_width, - T* output_data, T* mask_data) { + T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx( hstart = max(hstart, 0); wstart = max(wstart, 0); - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; input_data += (batch_idx * channels + c) * input_depth * input_height * input_width; @@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx( } } -template +template __global__ void KernelMaxPool3DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, const int padding_width, - T* input_grad) { + const int nthreads, const T1* output_grad, const T2* mask, + const int channels, const int input_depth, const int input_height, + const int input_width, const int output_depth, const int output_height, + const int output_width, const int ksize_depth, const int ksize_height, + const int ksize_width, const int stride_depth, const int stride_height, + const int stride_width, const int padding_depth, const int padding_height, + const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_feature_map_idx = (d_offset * input_height + h_offset) * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_depth * @@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad( * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, stride_width, @@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* output_grad_data = output_grad.data(); - const T* mask_data = mask.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T1* output_grad_data = output_grad.data(); + const T2* mask_data = mask.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_depth * input_height * input_width; @@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdxGrad< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, output_grad_data, mask_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, @@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/pooling.h b/paddle/operators/math/pooling.h index f6719e1e628cdd2cf7445ec9cd05713bc4f14c84..19fbd8b4bb2469d3ce8a139ce30a48641dbd6e0f 100644 --- a/paddle/operators/math/pooling.h +++ b/paddle/operators/math/pooling.h @@ -153,7 +153,7 @@ class MaxPool3dGradFunctor { * In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in * NCDHW format. */ -template +template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, @@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor { framework::Tensor* input_grad); }; -template +template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, diff --git a/paddle/operators/math/sequence2batch.cu b/paddle/operators/math/sequence2batch.cu index 8d04653832d58aa048f73e53b8349a08da3145a4..c5d968aeb216bbb3e0e17f138b9e891494d99f75 100644 --- a/paddle/operators/math/sequence2batch.cu +++ b/paddle/operators/math/sequence2batch.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#define EIGEN_USE_GPU #include "paddle/operators/math/sequence2batch.h" namespace paddle { diff --git a/paddle/operators/math/sequence2batch.h b/paddle/operators/math/sequence2batch.h index 794c7d43973924d470124baf8c0c3de66e4ba087..73295ddbcb73fe80be08e732790f0ec75e94b415 100644 --- a/paddle/operators/math/sequence2batch.h +++ b/paddle/operators/math/sequence2batch.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/framework/eigen.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" @@ -21,6 +22,10 @@ namespace paddle { namespace operators { namespace math { +template +using EigenMatrix = framework::EigenMatrix; + template class CopyMatrixRowsFunctor { public: diff --git a/paddle/operators/math/softmax.cc b/paddle/operators/math/softmax.cc index 0ba8197ab8b64649c8adcf67771ba01eca7f1d10..3e2f15d6c27f58818128f32fab0bd4c5f36b0050 100644 --- a/paddle/operators/math/softmax.cc +++ b/paddle/operators/math/softmax.cc @@ -13,13 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/math/softmax.h" +#include "paddle/operators/math/softmax_impl.h" namespace paddle { namespace operators { namespace math { template class SoftmaxFunctor; +template class SoftmaxFunctor; template class SoftmaxGradFunctor; +template class SoftmaxGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/softmax.cu b/paddle/operators/math/softmax.cu index 99f988d51e4b16c3f3bfd9c76b411bb53619603e..4dbab51d46bdaaa506a6c242d0958c73687f4eb9 100644 --- a/paddle/operators/math/softmax.cu +++ b/paddle/operators/math/softmax.cu @@ -15,13 +15,16 @@ limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/math/softmax.h" +#include "paddle/operators/math/softmax_impl.h" namespace paddle { namespace operators { namespace math { template class SoftmaxFunctor; +template class SoftmaxFunctor; template class SoftmaxGradFunctor; +template class SoftmaxGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/softmax.h b/paddle/operators/math/softmax.h index b7f627eee7f8fe68a83595a3390a55d438c97afb..fe1074650234c5beb5889e7efd713164769ad740 100644 --- a/paddle/operators/math/softmax.h +++ b/paddle/operators/math/softmax.h @@ -13,60 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" #include "paddle/framework/tensor.h" namespace paddle { namespace operators { namespace math { -template -using EigenMatrix = framework::EigenMatrix; - -template -struct ValueClip { - HOSTDEVICE T operator()(const T& x) const { - const T kThreshold = -64.; - return x < kThreshold ? kThreshold : x; - } -}; - template class SoftmaxFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor* X, framework::Tensor* Y) { - auto logits = EigenMatrix::From(*X); - auto softmax = EigenMatrix::From(*Y); - - const int kBatchDim = 0; - const int kClassDim = 1; - - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); - - Eigen::DSizes along_class(kClassDim); - Eigen::DSizes batch_by_one(batch_size, 1); - Eigen::DSizes one_by_class(1, num_classes); - - auto shifted_logits = (logits - - logits.maximum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)) - .unaryExpr(ValueClip()); - - softmax.device(*context.GetEigenDevice()) = shifted_logits.exp(); - softmax.device(*context.GetEigenDevice()) = - (softmax * - softmax.sum(along_class) - .inverse() - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); - } + const framework::Tensor* X, framework::Tensor* Y); }; template @@ -74,29 +31,7 @@ class SoftmaxGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor* y, const framework::Tensor* y_grad, - framework::Tensor* x_grad) { - auto softmax = EigenMatrix::From(*y); - auto softmax_grad = EigenMatrix::From(*y_grad); - auto logits_grad = EigenMatrix::From(*x_grad); - - const int kBatchDim = 0; - const int kClassDim = 1; - - const int batch_size = softmax.dimension(kBatchDim); - const int num_classes = softmax.dimension(kClassDim); - - Eigen::DSizes along_class(kClassDim); - Eigen::DSizes batch_by_one(batch_size, 1); - Eigen::DSizes one_by_class(1, num_classes); - - auto dot = (softmax * softmax_grad) - .sum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class); - logits_grad.device(*context.GetEigenDevice()) = - (softmax_grad - dot) * softmax; - } + framework::Tensor* x_grad); }; } // namespace math diff --git a/paddle/operators/math/softmax_impl.h b/paddle/operators/math/softmax_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..05793eeb3eeafaf36c301236197555b7b35e5803 --- /dev/null +++ b/paddle/operators/math/softmax_impl.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace operators { +namespace math { + +template +using EigenMatrix = framework::EigenMatrix; + +template +struct ValueClip { + HOSTDEVICE T operator()(const T& x) const { + const T kThreshold = -64.; + return x < kThreshold ? kThreshold : x; + } +}; + +template +void SoftmaxFunctor::operator()( + const platform::DeviceContext& context, const framework::Tensor* X, + framework::Tensor* Y) { + auto logits = EigenMatrix::From(*X); + auto softmax = EigenMatrix::From(*Y); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto shifted_logits = (logits - + logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)) + .unaryExpr(ValueClip()); + + softmax.device(*context.GetEigenDevice()) = shifted_logits.exp(); + softmax.device(*context.GetEigenDevice()) = + (softmax * + softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); +} + +template +void SoftmaxGradFunctor::operator()( + const platform::DeviceContext& context, const framework::Tensor* y, + const framework::Tensor* y_grad, framework::Tensor* x_grad) { + auto softmax = EigenMatrix::From(*y); + auto softmax_grad = EigenMatrix::From(*y_grad); + auto logits_grad = EigenMatrix::From(*x_grad); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = softmax.dimension(kBatchDim); + const int num_classes = softmax.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto dot = (softmax * softmax_grad) + .sum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class); + logits_grad.device(*context.GetEigenDevice()) = + (softmax_grad - dot) * softmax; +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/vol2col.cc b/paddle/operators/math/vol2col.cc index e9718a047381596a1570b4b00546622968b70227..99eb7fd46de42400a915d86706580d15b08a74a2 100644 --- a/paddle/operators/math/vol2col.cc +++ b/paddle/operators/math/vol2col.cc @@ -28,28 +28,51 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const { PADDLE_ENFORCE(vol.dims().size() == 4); - PADDLE_ENFORCE(col.dims().size() == 7); + PADDLE_ENFORCE(col->dims().size() == 7); int input_channels = vol.dims()[0]; int input_depth = vol.dims()[1]; int input_height = vol.dims()[2]; int input_width = vol.dims()[3]; - int filter_depth = col.dims()[1]; - int filter_height = col.dims()[2]; - int filter_width = col.dims()[3]; - int output_depth = col.dims()[4]; - int output_height = col.dims()[5]; - int output_width = col.dims()[6]; + int filter_depth = col->dims()[1]; + int filter_height = col->dims()[2]; + int filter_width = col->dims()[3]; + int output_depth = col->dims()[4]; + int output_height = col->dims()[5]; + int output_width = col->dims()[6]; int channels_col = input_channels * filter_depth * filter_height * filter_width; + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "mismatching."); + const T* vol_data = vol.data(); - T* col_data = col.data(); + T* col_data = col->data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; @@ -57,24 +80,23 @@ class Vol2ColFunctor { int d_offset = (c / filter_width / filter_height) % filter_depth; int c_in = c / filter_width / filter_height / filter_depth; for (int d = 0; d < output_depth; ++d) { - int d_pad = d * stride_depth - padding_depth + d_offset; + int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0]; for (int h = 0; h < output_height; ++h) { - int h_pad = h * stride_height - padding_height + h_offset; + int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1]; for (int w = 0; w < output_width; ++w) { - int w_pad = w * stride_width - padding_width + w_offset; + int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2]; int col_idx = ((c * output_depth + d) * output_height + h) * output_width + w; - if (h_pad < 0 || h_pad >= input_height || w_pad < 0 || - w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) { - col_data[col_idx] = static_cast(0); - } else { - int vol_idx = - ((c_in * input_depth + d_pad) * input_height + h_pad) * - input_width + - w_pad; - col_data[col_idx] = vol_data[vol_idx]; - } + int vol_idx = + ((c_in * input_depth + d_pad) * input_height + h_pad) * + input_width + + w_pad; + col_data[col_idx] = + (h_pad < 0 || h_pad >= input_height || w_pad < 0 || + w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) + ? static_cast(0) + : vol_data[vol_idx]; } } } @@ -92,17 +114,18 @@ template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { - PADDLE_ENFORCE(vol.dims().size() == 4); + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const { + PADDLE_ENFORCE(vol->dims().size() == 4); PADDLE_ENFORCE(col.dims().size() == 7); - int input_channels = vol.dims()[0]; - int input_depth = vol.dims()[1]; - int input_height = vol.dims()[2]; - int input_width = vol.dims()[3]; + int input_channels = vol->dims()[0]; + int input_depth = vol->dims()[1]; + int input_height = vol->dims()[2]; + int input_width = vol->dims()[3]; int filter_depth = col.dims()[1]; int filter_height = col.dims()[2]; int filter_width = col.dims()[3]; @@ -112,7 +135,28 @@ class Col2VolFunctor { int channels_col = input_channels * filter_depth * filter_height * filter_width; - T* vol_data = vol.data(); + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "mismatching."); + T* vol_data = vol->data(); const T* col_data = col.data(); for (int c = 0; c < channels_col; ++c) { @@ -121,11 +165,11 @@ class Col2VolFunctor { int d_offset = (c / filter_width / filter_height) % filter_depth; int cIm = c / filter_width / filter_height / filter_depth; for (int d = 0; d < output_depth; ++d) { - int d_pad = d * stride_depth - padding_depth + d_offset; + int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0]; for (int h = 0; h < output_height; ++h) { - int h_pad = h * stride_height - padding_height + h_offset; + int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1]; for (int w = 0; w < output_width; ++w) { - int w_pad = w * stride_width - padding_width + w_offset; + int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2]; if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 && w_pad < input_width && d_pad >= 0 && d_pad < input_depth) { @@ -133,6 +177,7 @@ class Col2VolFunctor { ((cIm * input_depth + d_pad) * input_height + h_pad) * input_width + w_pad; + int col_idx = ((c * output_depth + d) * output_height + h) * output_width + w; diff --git a/paddle/operators/math/vol2col.cu b/paddle/operators/math/vol2col.cu index 27b11fb237575fd25a789a5fcc24ed4e30607009..dae3be858e9f47d0133aa37e8a5f90a0addf1dfd 100644 --- a/paddle/operators/math/vol2col.cu +++ b/paddle/operators/math/vol2col.cu @@ -21,11 +21,12 @@ namespace math { template __global__ void vol2col(int num_kernels, const T* data_vol, int depth, - int height, int width, int filter_depth, - int filter_height, int filter_width, int stride_depth, - int stride_height, int stride_width, int padding_depth, - int padding_height, int padding_width, int output_detph, - int output_height, int output_width, T* data_col) { + int height, int width, int dilation_d, int dilation_h, + int dilation_w, int filter_depth, int filter_height, + int filter_width, int stride_depth, int stride_height, + int stride_width, int padding_depth, int padding_height, + int padding_width, int output_detph, int output_height, + int output_width, T* data_col) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; index += blockDim.x * gridDim.x) { int w_out = index % output_width; @@ -44,12 +45,14 @@ __global__ void vol2col(int num_kernels, const T* data_vol, int depth, for (int k = 0; k < filter_depth; ++k) { for (int i = 0; i < filter_height; ++i) { for (int j = 0; j < filter_width; ++j) { - int d = d_in + k; - int h = h_in + i; - int w = w_in + j; + int d = d_in + k * dilation_d; + int h = h_in + i * dilation_h; + int w = w_in + j * dilation_w; + int col_idx = (k * dilation_d * height + i * dilation_h) * width + + j * dilation_w; *data_col = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width) - ? data_vol[(k * height + i) * width + j] + ? data_vol[col_idx] : 0; data_col += output_detph * output_height * output_width; } @@ -68,23 +71,46 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const { PADDLE_ENFORCE(vol.dims().size() == 4); - PADDLE_ENFORCE(col.dims().size() == 7); + PADDLE_ENFORCE(col->dims().size() == 7); int input_channels = vol.dims()[0]; int input_depth = vol.dims()[1]; int input_height = vol.dims()[2]; int input_width = vol.dims()[3]; - int filter_depth = col.dims()[1]; - int filter_height = col.dims()[2]; - int filter_width = col.dims()[3]; - int output_depth = col.dims()[4]; - int output_height = col.dims()[5]; - int output_width = col.dims()[6]; + int filter_depth = col->dims()[1]; + int filter_height = col->dims()[2]; + int filter_width = col->dims()[3]; + int output_depth = col->dims()[4]; + int output_height = col->dims()[5]; + int output_width = col->dims()[6]; + + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "Mismatching."); int num_outputs = input_channels * output_depth * output_height * output_width; @@ -95,19 +121,25 @@ class Vol2ColFunctor { reinterpret_cast(context) .stream()>>>( num_outputs, vol.data(), input_depth, input_height, input_width, - filter_depth, filter_height, filter_width, stride_depth, stride_height, - stride_width, padding_depth, padding_height, padding_width, - output_depth, output_height, output_width, col.data()); + dilations[0], dilations[1], dilations[2], filter_depth, filter_height, + filter_width, strides[0], strides[1], strides[2], paddings[0], + paddings[1], paddings[2], output_depth, output_height, output_width, + col->data()); } }; template __global__ void col2vol(int num_kernels, const T* data_col, int depth, - int height, int width, int filter_depth, - int filter_height, int filter_width, int stride_depth, - int stride_height, int stride_width, int padding_depth, - int padding_height, int padding_width, int output_detph, - int output_height, int output_width, T* data_vol) { + int height, int width, int dilation_d, int dilation_h, + int dilation_w, int filter_depth, int filter_height, + int filter_width, int stride_depth, int stride_height, + int stride_width, int padding_depth, int padding_height, + int padding_width, int output_detph, int output_height, + int output_width, T* data_vol) { + const int d_filter_depth = dilation_d * (filter_depth - 1) + 1; + const int d_filter_height = dilation_h * (filter_height - 1) + 1; + const int d_filter_width = dilation_w * (filter_width - 1) + 1; + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; index += blockDim.x * gridDim.x) { T src_val = 0; @@ -115,35 +147,41 @@ __global__ void col2vol(int num_kernels, const T* data_col, int depth, int h = (index / width) % height + padding_height; int d = (index / width / height) % depth + padding_depth; int c = index / width / height / depth; + // compute the start and end of the output int w_col_start = - (w < filter_width) ? 0 : (w - filter_width) / stride_width + 1; + (w < d_filter_width) ? 0 : (w - d_filter_width) / stride_width + 1; int w_col_end = min(w / stride_width + 1, output_width); int h_col_start = - (h < filter_height) ? 0 : (h - filter_height) / stride_height + 1; + (h < d_filter_height) ? 0 : (h - d_filter_height) / stride_height + 1; int h_col_end = min(h / stride_height + 1, output_height); int d_col_start = - (d < filter_depth) ? 0 : (d - filter_depth) / stride_depth + 1; + (d < d_filter_depth) ? 0 : (d - d_filter_depth) / stride_depth + 1; int d_col_end = min(d / stride_depth + 1, output_detph); - int offset = (c * filter_depth * filter_height * filter_width + - d * filter_width * filter_height + h * filter_width + w) * - output_detph * output_height * output_width; - - int coeff_d_col = - (1 - stride_depth * filter_width * filter_height * output_detph) * - output_height * output_width; - int coeff_h_col = - (1 - stride_height * filter_width * output_detph * output_height) * - output_width; - int coeff_w_col = - (1 - stride_width * output_detph * output_height * output_width); - for (int d_col = d_col_start; d_col < d_col_end; ++d_col) { for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - src_val += data_col[offset + d_col * coeff_d_col + - h_col * coeff_h_col + w_col * coeff_w_col]; + int d_off = (d - d_col * stride_depth); + int h_off = (h - h_col * stride_height); + int w_off = (w - w_col * stride_width); + if (d_off % dilation_d == 0 && h_off % dilation_h == 0 && + w_off % dilation_w == 0) { + d_off /= dilation_d; + h_off /= dilation_h; + w_off /= dilation_w; + + int data_col_index = + (((((c * filter_depth + d_off) * filter_height + h_off) * + filter_width + + w_off))); + data_col_index = + ((data_col_index * output_detph + d_col) * output_height + + h_col) * + output_width + + w_col; + src_val += data_col[data_col_index]; + } } } } @@ -161,17 +199,18 @@ template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { - PADDLE_ENFORCE(vol.dims().size() == 4); + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const { + PADDLE_ENFORCE(vol->dims().size() == 4); PADDLE_ENFORCE(col.dims().size() == 7); - int input_channels = vol.dims()[0]; - int input_depth = vol.dims()[1]; - int input_height = vol.dims()[2]; - int input_width = vol.dims()[3]; + int input_channels = vol->dims()[0]; + int input_depth = vol->dims()[1]; + int input_height = vol->dims()[2]; + int input_width = vol->dims()[3]; int filter_depth = col.dims()[1]; int filter_height = col.dims()[2]; int filter_width = col.dims()[3]; @@ -179,6 +218,28 @@ class Col2VolFunctor { int output_height = col.dims()[5]; int output_width = col.dims()[6]; + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "Mismatching."); + int num_kernels = input_channels * input_depth * input_height * input_width; const int threads = 1024; @@ -188,9 +249,10 @@ class Col2VolFunctor { reinterpret_cast(context) .stream()>>>( num_kernels, col.data(), input_depth, input_height, input_width, - filter_depth, filter_height, filter_width, stride_depth, stride_height, - stride_width, padding_depth, padding_height, padding_width, - output_depth, output_height, output_width, vol.data()); + dilations[0], dilations[1], dilations[2], filter_depth, filter_height, + filter_width, strides[0], strides[1], strides[2], paddings[0], + paddings[1], paddings[2], output_depth, output_height, output_width, + vol->data()); } }; diff --git a/paddle/operators/math/vol2col.h b/paddle/operators/math/vol2col.h index f022365a16fbf61981e94bedbd8b21a32887b235..cbc30bd754608dd6e6def1a4097d69bdf0c942c3 100644 --- a/paddle/operators/math/vol2col.h +++ b/paddle/operators/math/vol2col.h @@ -31,6 +31,15 @@ namespace math { * \param colData Column data. * \param colShape The shape of colData. * + * \param dilations dilation data. + * \param 3-dimension [dilation_depth, dilation_height, dilation_width]. + * + * \param strides stride data. + * \param 3-dimension [stride_depth, stride_height, stride_width]. + * + * \param paddings padding data. + * \param 3-dimension [d_pad, h_pad, w_pad]. + * * The shape of colData is: * [input_channels, filter_depth, filter_height, filter_width, output_depth, * output_height, output_width] @@ -57,20 +66,22 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const; + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const; }; template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const; + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const; }; } // namespace math diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index 74590d17cd0f974f830e760d85daef8ab5318a43..c31c716842f30de67c29b803866b8c82ddcf4a41 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -62,11 +62,15 @@ void testVol2col() { int input_height = 2; int input_width = 3; int filter_size = 2; - int stride = 1; - int padding = 0; - int output_depth = (input_depth - filter_size + 2 * padding) / stride + 1; - int output_height = (input_height - filter_size + 2 * padding) / stride + 1; - int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + std::vector strides({1, 1, 1}); + std::vector paddings({0, 0, 0}); + std::vector dilations({1, 1, 1}); + int output_depth = + (input_depth - filter_size + 2 * paddings[0]) / strides[0] + 1; + int output_height = + (input_height - filter_size + 2 * paddings[1]) / strides[1] + 1; + int output_width = + (input_width - filter_size + 2 * paddings[2]) / strides[2] + 1; // Vol2Col test float* input_ptr = @@ -85,8 +89,7 @@ void testVol2col() { *place); paddle::operators::math::Vol2ColFunctor vol2col; - vol2col(*context, input, output, stride, stride, stride, padding, padding, - padding); + vol2col(*context, input, dilations, strides, paddings, &output); float vol_2_col[] = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11}; float* out_cfo_ptr; @@ -111,8 +114,7 @@ void testVol2col() { } paddle::operators::math::Col2VolFunctor col2vol; - col2vol(*context, input, output, stride, stride, stride, padding, padding, - padding); + col2vol(*context, output, dilations, strides, paddings, &input); float* in_ptr; if (paddle::platform::is_cpu_place(*place)) { diff --git a/paddle/operators/matmul_op.cu b/paddle/operators/matmul_op.cu.cc similarity index 100% rename from paddle/operators/matmul_op.cu rename to paddle/operators/matmul_op.cu.cc diff --git a/paddle/operators/matmul_op.h b/paddle/operators/matmul_op.h index 4f565946d596b5e5fbf90f16c0c13c780c36886c..1e4aa48b7018d8e3d6f02591fbca2877ddbd3c5d 100644 --- a/paddle/operators/matmul_op.h +++ b/paddle/operators/matmul_op.h @@ -15,8 +15,8 @@ #pragma once #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/matmul.h" -#include "paddle/operators/transpose_op.h" namespace paddle { namespace operators { @@ -76,7 +76,10 @@ Tensor CombineBatchAndN(const framework::ExecutionContext& context, if (in_dims.size() == 3) { output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); - EigenTranspose(context, input, output, {1, 0, 2}); + std::vector axis = {1, 0, 2}; + math::Transpose trans; + trans(context.device_context(), input, &output, axis); + std::vector out_dims = {in_dims[1], in_dims[0] * in_dims[2]}; output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); } else { output.ShareDataWith(input); diff --git a/paddle/operators/maxout_op.cc b/paddle/operators/maxout_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..95467f2e69093906980d075b6a41c5d2934dd5a2 --- /dev/null +++ b/paddle/operators/maxout_op.cc @@ -0,0 +1,104 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/operators/maxout_op.h" +namespace paddle { +namespace operators { + +using framework::Tensor; + +class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(Tensor) The input tensor of maxout operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of feature."); + AddOutput("Out", + "(Tensor) The output tensor of maxout operator." + "The format of output tensor is also NCHW." + "Where N is batch size, C is " + "the number of channels, H and W is the height and " + "width of feature."); + AddAttr( + "groups", + R"DOC("Specifies how many groups the input tensor will be split" + "in the channel dimension. And the number of output channel is " + "the number of channels divided by groups.." + )DOC"); + AddComment(R"DOC( + Assumed the input shape is (N, Ci, H, W). + The output shape is (N, Co, H, W). Then `Co = Ci / groups`. + + math: + y_{si+j} = \max_k x_{gsi + sk + j} + g = groups + s = input.size / num_channels + 0 \le i < num_channels / groups + 0 \le j < s + 0 \le k < groups + + Please refer to Paper: + - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf + - Multi-digit Number Recognition from Street View \ + Imagery using Deep Convolutional Neural Networks: \ + https://arxiv.org/pdf/1312.6082v4.pdf + )DOC"); + } +}; + + +class MaxOutOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of MaxoutOp should not be null."); + auto in_x_dims = ctx->GetInputDim("X"); + int groups = ctx->Attrs().Get("groups"); + // check groups > 1 + PADDLE_ENFORCE_GT( + groups, 1, + "groups should be larger than 1 in maxoutop"); + std::vector output_shape({in_x_dims[0], in_x_dims[1] / groups}); + output_shape.push_back(in_x_dims[2]); + output_shape.push_back(in_x_dims[3]); + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + } +}; + +class MaxOutOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Input(X@GRAD) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad, + ops::MaxOutOpGrad); +REGISTER_OP_CPU_KERNEL(maxout, ops::MaxOutKernel); +REGISTER_OP_CPU_KERNEL(maxout_grad, + ops::MaxOutGradKernel); diff --git a/paddle/operators/maxout_op.cu.cc b/paddle/operators/maxout_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..a5823fba6848a0d42a743c90d7d683e3e4ae4422 --- /dev/null +++ b/paddle/operators/maxout_op.cu.cc @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/maxout_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(maxout, + ops::MaxOutKernel, + ops::MaxOutKernel); +REGISTER_OP_GPU_KERNEL(maxout_grad, + ops::MaxOutGradKernel, + ops::MaxOutGradKernel); diff --git a/paddle/operators/maxout_op.h b/paddle/operators/maxout_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c404cd16a9b2372ea4c6a17eb5ac82cf8f3bf27c --- /dev/null +++ b/paddle/operators/maxout_op.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/maxouting.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MaxOutKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* in_x = context.Input("X"); + Tensor* out = context.Output("Out"); + int groups = context.template Attr("groups"); + + math::MaxOutFunctor maxout_forward; + maxout_forward(context.device_context(), *in_x, out, groups); + } +}; + +template +class MaxOutGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* in_x = context.Input("X"); + const Tensor* out = context.Input("Out"); + const Tensor* out_grad = + context.Input(framework::GradVarName("Out")); + Tensor* in_x_grad = context.Output(framework::GradVarName("X")); + int groups = context.template Attr("groups"); + auto& device_ctx = context.device_context(); + math::SetConstant zero; + if (in_x_grad) { + in_x_grad->mutable_data(context.GetPlace()); + zero(device_ctx, in_x_grad, static_cast(0.0)); + math::MaxOutGradFunctor maxout_backward; + maxout_backward(context.device_context(), *in_x, in_x_grad, *out, + *out_grad, groups); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu.cc similarity index 100% rename from paddle/operators/mul_op.cu rename to paddle/operators/mul_op.cu.cc diff --git a/paddle/operators/nccl_op.cu b/paddle/operators/nccl_op.cu.cc similarity index 100% rename from paddle/operators/nccl_op.cu rename to paddle/operators/nccl_op.cu.cc diff --git a/paddle/operators/nccl_op_test.cu b/paddle/operators/nccl_op_test.cu.cc similarity index 100% rename from paddle/operators/nccl_op_test.cu rename to paddle/operators/nccl_op_test.cu.cc diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc index f962d9e3e6abde14ce21eb0102f10d139fdb160e..be9fcc5661f420aadf908cf80cce6c963008b0e4 100644 --- a/paddle/operators/pool_cudnn_op.cc +++ b/paddle/operators/pool_cudnn_op.cc @@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d_cudnn, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) + +REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad, + ops::PoolOpGrad); + +REGISTER_OP_CPU_KERNEL(pool3d_cudnn, + ops::PoolKernel, + ops::PoolKernel); +REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad, + ops::PoolGradKernel, + ops::PoolGradKernel) diff --git a/paddle/operators/pool_cudnn_op.cu b/paddle/operators/pool_cudnn_op.cu.cc similarity index 88% rename from paddle/operators/pool_cudnn_op.cu rename to paddle/operators/pool_cudnn_op.cu.cc index 8711567b95fea355396173b5312d26d31f9ffb12..66dd194ccd5ed629c5861552a7c124dc911362d7 100644 --- a/paddle/operators/pool_cudnn_op.cu +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -135,8 +147,7 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { if (input_grad) { T *input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; - set_zero(ctx.device_context(), input_grad, static_cast(0)); + // Because beta is zero, it is unnecessary to reset input_grad. PADDLE_ENFORCE(platform::dynload::cudnnPoolingBackward( handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data, @@ -151,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel); -REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); + +REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index f3963b1995ef8767786f0bf230b134afc69aa99d..d8c58618cf703d086d3cabc927ebc5eb038b1aec 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_op.cu b/paddle/operators/pool_op.cu.cc similarity index 74% rename from paddle/operators/pool_op.cu rename to paddle/operators/pool_op.cu.cc index 0e3b80868f7b9d1697d619889160856d65ad59a3..1010cb762289dd39cd632c699f7528f4ba638278 100644 --- a/paddle/operators/pool_op.cu +++ b/paddle/operators/pool_op.cu.cc @@ -17,11 +17,15 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool2d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); REGISTER_OP_GPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 1df36e965abab3549aeb88bf682b712033c4d79c..4958fa645405db0798f37165030eae95da371477 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of Pooling should not be null."); + "Input(X) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Out(Output) of Pooling should not be null."); + "Output(Out) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Mask"), - "Mask(Output) of Pooling should not be null."); + "Output(Mask) of Pooling should not be null."); auto in_x_dims = ctx->GetInputDim("X"); @@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Mask", framework::make_ddim(output_shape)); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { @@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { @@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr( "global_pooling", - "(bool, default false) Whether to use the global pooling. " + "(bool, default:false) Whether to use the global pooling. " "If global_pooling = true, ksize and paddings will be ignored.") .SetDefault(false); AddAttr>("strides", @@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0, 0}), paddings(height, width) of pooling " + "(vector, defalut:{0, 0}), paddings(height, width) of pooling " "operator. " "If global_pooling = true, paddings and will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad, @@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.cu b/paddle/operators/pool_with_index_op.cu.cc similarity index 76% rename from paddle/operators/pool_with_index_op.cu rename to paddle/operators/pool_with_index_op.cu.cc index 287657d4b1c57f354ef050885f71261092bdc062..335064a7eea4ec15c529db5254cbb026ba575f3d 100644 --- a/paddle/operators/pool_with_index_op.cu +++ b/paddle/operators/pool_with_index_op.cu.cc @@ -18,14 +18,18 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP_GPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.h b/paddle/operators/pool_with_index_op.h index c0e3b117dc3ea351b9edfed4d1823de0db27d30a..40766c7e821e8b85aeda9473798a1f696d0ad719 100644 --- a/paddle/operators/pool_with_index_op.h +++ b/paddle/operators/pool_with_index_op.h @@ -24,8 +24,8 @@ namespace operators { using Tensor = framework::Tensor; -template -class MaxPoolWithIndexKernel : public framework::OpKernel { +template +class MaxPoolWithIndexKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); @@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexFunctor + paddle::operators::math::MaxPool2dWithIndexFunctor pool2d_forward; pool2d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexFunctor + paddle::operators::math::MaxPool3dWithIndexFunctor pool3d_forward; pool3d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); @@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { } }; -template -class MaxPoolWithIndexGradKernel : public framework::OpKernel { +template +class MaxPoolWithIndexGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* mask = context.Input("Mask"); @@ -80,23 +80,22 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { } if (in_x_grad) { - in_x_grad->mutable_data(context.GetPlace()); - auto temp = framework::EigenVector::Flatten(*in_x_grad); - temp.device(context.GetEigenDevice()) = - temp.constant(static_cast(0)); + in_x_grad->mutable_data(context.GetPlace()); + auto& device_ctx = context.device_context(); + math::set_constant(device_ctx, in_x_grad, 0); switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexGradFunctor + paddle::operators::math::MaxPool2dWithIndexGradFunctor pool2d_backward; - pool2d_backward(context.device_context(), *out_grad, *mask, ksize, - strides, paddings, in_x_grad); + pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides, + paddings, in_x_grad); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexGradFunctor + paddle::operators::math::MaxPool3dWithIndexGradFunctor pool3d_backward; - pool3d_backward(context.device_context(), *out_grad, *mask, ksize, - strides, paddings, in_x_grad); + pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, + paddings, in_x_grad); } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } } diff --git a/paddle/operators/reduce_op.h b/paddle/operators/reduce_op.h index 45043c440bc8017e97f8be00d08f1cb60d201e20..dd6547542d16b0fe336184a0c09a8498027db6ea 100644 --- a/paddle/operators/reduce_op.h +++ b/paddle/operators/reduce_op.h @@ -14,6 +14,7 @@ #pragma once +#include "glog/logging.h" #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -26,6 +27,10 @@ template using EigenTensor = framework::EigenTensor; +template +using EigenScalar = framework::EigenScalar; + struct SumFunctor { template void operator()(const Place& place, X& x, Y& y, const Dim& dim) { @@ -133,10 +138,17 @@ class ReduceKernel : public framework::OpKernel { dims_vector.erase(dims_vector.begin() + dim); dims = framework::make_ddim(dims_vector); } - auto out = EigenTensor < T, D == 1 ? 1 : (D - 1) > ::From(*output, dims); + auto& place = context.GetEigenDevice(); Functor functor; - functor(place, x, out, reduce_dim); + + if (D == 1) { + auto out = EigenScalar::From(*output); + functor(place, x, out, reduce_dim); + } else { + auto out = EigenTensor::From(*output, dims); + functor(place, x, out, reduce_dim); + } } }; @@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel { auto x_reduce = EigenTensor::From(*input1, dims); auto x_reduce_grad = EigenTensor::From(*input2, dims); - Eigen::array braodcast_dim; - for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1; - braodcast_dim[dim] = input0->dims()[dim]; + Eigen::array broadcast_dim; + for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; + broadcast_dim[dim] = input0->dims()[dim]; auto& place = context.GetEigenDevice(); Functor functor; - functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim, - braodcast_dim[dim]); + functor(place, x, x_reduce, x_grad, x_reduce_grad, broadcast_dim, + broadcast_dim[dim]); } }; diff --git a/paddle/operators/reshape_op.cu b/paddle/operators/reshape_op.cu.cc similarity index 100% rename from paddle/operators/reshape_op.cu rename to paddle/operators/reshape_op.cu.cc diff --git a/paddle/operators/sequence_concat_op.cu b/paddle/operators/sequence_concat_op.cu.cc similarity index 100% rename from paddle/operators/sequence_concat_op.cu rename to paddle/operators/sequence_concat_op.cu.cc diff --git a/paddle/operators/sequence_conv_op.cc b/paddle/operators/sequence_conv_op.cc index 41cadce4c603a9c14db79e2f6b30f8664cf72a38..c5533732d44737bb8cc71fd8ac46f3c36c72ada1 100644 --- a/paddle/operators/sequence_conv_op.cc +++ b/paddle/operators/sequence_conv_op.cc @@ -179,7 +179,9 @@ REGISTER_OP(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, sequence_conv_grad, ops::SequenceConvGradOp); REGISTER_OP_CPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_CPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_conv_op.cu b/paddle/operators/sequence_conv_op.cu.cc similarity index 85% rename from paddle/operators/sequence_conv_op.cu rename to paddle/operators/sequence_conv_op.cu.cc index 4c0c673a517c4b05c3abd8bf6b5cf5bbb19cfae0..c8136dbcb35be4f1236dddc3d24546f9d91670c8 100644 --- a/paddle/operators/sequence_conv_op.cu +++ b/paddle/operators/sequence_conv_op.cu.cc @@ -12,13 +12,13 @@ See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU - #include "paddle/operators/sequence_conv_op.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_GPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h index a57e1752bb8ed4844423f752bf0ad9f8e114486a..b8fbe2647c4338a2fa16aa655ebab64dd8d5417d 100644 --- a/paddle/operators/sequence_conv_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/math/context_project.h" #include "paddle/operators/math/math_function.h" @@ -62,9 +61,9 @@ class SequenceConvKernel : public framework::OpKernel { math::ContextProjectFunctor seq_project_functor; - seq_project_functor(context.device_context(), *in, *padding_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, &col); math::matmul(context.device_context(), col, false, filter, false, static_cast(1.0), out, static_cast(0.0)); @@ -117,10 +116,10 @@ class SequenceConvGradKernel : public framework::OpKernel { in_g->set_lod(in->lod()); set_zero(context.device_context(), in_g, static_cast(0)); - seq_project_grad_functor(context.device_context(), *in_g, *padding_data_g, - col, padding_trainable, context_start, - context_length, context_stride, up_pad, down_pad, - true, false); + seq_project_grad_functor(context.device_context(), *in_g, + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, false, true, + padding_data_g, &col); } if (padding_trainable && padding_data_g) { @@ -129,9 +128,9 @@ class SequenceConvGradKernel : public framework::OpKernel { LoDTensor* input = const_cast(in); seq_project_grad_functor(context.device_context(), *input, - *padding_data_g, col, padding_trainable, - context_start, context_length, context_stride, - up_pad, down_pad, false, true); + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, true, false, + padding_data_g, &col); } if (filter_g) { @@ -146,9 +145,9 @@ class SequenceConvGradKernel : public framework::OpKernel { padding_data = context.Input("PaddingData"); } - seq_project_functor(context.device_context(), *in, *padding_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, &col); math::matmul(context.device_context(), col, true, out_grad, false, T(1.0), &filter_grad, T(1.0)); diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc new file mode 100755 index 0000000000000000000000000000000000000000..cbe0b4233160dd1f3ebdf6db8b5f6df392efdfe7 --- /dev/null +++ b/paddle/operators/sequence_slice_op.cc @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace paddle { +namespace operators { + +class SequenceSliceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Offset"), + "Input(Offset) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Length"), + "Input(Length) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SequenceSliceOp should not be null."); + auto input_dims = ctx->GetInputDim("X"); + + auto offset_dim = ctx->GetInputDim("Offset"); + auto length_dim = ctx->GetInputDim("Length"); + + PADDLE_ENFORCE_EQ( + offset_dim.size(), 2UL, + "Only support one level sequence now, The rank of offset must be 2."); + PADDLE_ENFORCE_EQ( + length_dim.size(), 2UL, + "Only support one level sequence now, The rank of Length must be 2."); + + // Initialize the output's dims to maximum, + // and re-set to real dims by the value of Offset and Length at kernel + ctx->SetOutputDim("Out", input_dims); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceSliceOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor), " + "the input of SequenceSliceOp."); + AddInput("Offset", + "(Tensor), " + "a vector to describe the offset of every input sequence for " + "sub sequence item."); + AddInput("Length", + "(Tensor), " + "a vector to describe the length of every input sequence for " + "sub sequence item."); + AddOutput("Out", + "(LoDTensor), the output of SequenceSliceOp."); + AddComment(R"DOC( +Sequence slice operator + +The operator crops a subsequence from given sequence with given start offset and subsequence length. +It only supports sequence (LoD Tensor with level number is 1). +- Case: + X = [[a1, a2; + b1, b2; + c1, c2] + [d1, d2; + e1, e2]] + LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2) + Offset = [[0], [1]]; Length = [[2], [1]] + + Out = [[a1, a2; + b1, b2] + [e1, e2]] + LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2) +NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0. + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker, + sequence_slice_grad, ops::SequenceSliceGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_CPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.cu b/paddle/operators/sequence_slice_op.cu new file mode 100755 index 0000000000000000000000000000000000000000..a9f59dadba74d900fa5cc0601fb5b264ea19e34d --- /dev/null +++ b/paddle/operators/sequence_slice_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_GPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h new file mode 100755 index 0000000000000000000000000000000000000000..2c9b8464a1236a054cf1a38b9dc1d73588f8dd38 --- /dev/null +++ b/paddle/operators/sequence_slice_op.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/strided_memcpy.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; + +template +inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data, + const int64_t* length_data) { + auto out_lod = in.lod(); + size_t lod_offset = 0; + + auto n = in.lod()[0].size() - 1; + out_lod[0][0] = 0; + for (size_t i = 0; i < n; ++i) { + lod_offset += length_data[i]; + out_lod[0][i+1] = lod_offset; + } + return out_lod; +} + +template +class SequenceSliceOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out = ctx.Output("Out"); + + auto lod = in->lod(); + auto n = lod[0].size() - 1; + + PADDLE_ENFORCE_EQ(lod.size(), 1UL, + "Only support one level sequence now."); + PADDLE_ENFORCE_EQ( + n, static_cast(length->dims()[0]), + "The size of input-sequence and length-array should be the same") + PADDLE_ENFORCE_EQ( + n, static_cast(offset->dims()[0]), + "The size of input-sequence and offset-array should be the same") + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + length_data = length_cpu.data(); + } + + for (size_t i = 0; i < n; ++i) { + PADDLE_ENFORCE_LT(0, offset_data[i], + "The offset[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT(0, length_data[i], + "The length[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT( + lod[0][i] + offset_data[i] + length_data[i], + lod[0][i + 1], + "The target tensor's length overflow.") + } + + out->mutable_data(ctx.GetPlace()); + auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); + auto out_dims = in->dims(); + out_dims[0] = out_lod[0][out_lod[0].size() - 1]; + out->Resize(out_dims); + out->set_lod(out_lod); + + auto in_stride = framework::stride(in->dims()); + auto out_stride = framework::stride(out->dims()); + + size_t out_offset = 0; + for (size_t i = 0; i < n; ++i) { + Tensor in_t = + in->Slice(static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + + length_data[i])); + + StridedMemcpy(ctx.device_context(), in_t.data(), + in_stride, in_t.dims(), out_stride, + out->data() + out_offset); + out_offset += length_data[i] * in_stride[0]; + } + } +}; + +template +class SequenceSliceGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + length_data = length_cpu.data(); + } + + auto lod = in->lod(); + auto out_lod = out_grad->lod(); + + if (x_grad) { + x_grad->mutable_data(ctx.GetPlace()); + x_grad->set_lod(in->lod()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + auto out_grad_stride = framework::stride(out_grad->dims()); + + for (size_t i = 0; i < out_lod[0].size() - 1; ++i) { + Tensor out_grad_t = + out_grad->Slice(static_cast(out_lod[0][i]), + static_cast(out_lod[0][i + 1])); + auto out_grad_stride = framework::stride(out_grad_t.dims()); + + auto x_grad_stride = framework::stride(x_grad->dims()); + + Tensor x_grad_t = x_grad->Slice( + static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + length_data[i])); + + StridedMemcpy(ctx.device_context(), out_grad_t.data(), + out_grad_stride, out_grad_t.dims(), x_grad_stride, + x_grad_t.data()); + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sequence_softmax_op.cu b/paddle/operators/sequence_softmax_op.cu.cc similarity index 100% rename from paddle/operators/sequence_softmax_op.cu rename to paddle/operators/sequence_softmax_op.cu.cc diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 2f41c7fc121950926f6e8d842eb629d59738f321..7b6c5ec30628b521b594ceaa3b7f1e0e03e497e4 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -20,11 +20,11 @@ namespace paddle { namespace operators { namespace { -template +template __global__ void SparseSGDFunctorKernel(const T* selected_rows, const int64_t* rows, const T* learning_rate, T* tensor_out, - int64_t row_numel, int block_size) { + int64_t row_numel) { const int ty = blockIdx.y; int tid = threadIdx.x; @@ -59,14 +59,15 @@ struct SparseSGDFunctor { auto* in_data = in_value.data(); auto* out_data = output->data(); - int block_size = 256; + const int block_size = 256; dim3 threads(block_size, 1); dim3 grid(1, in_rows.size()); SparseSGDFunctorKernel< - T><<(context) - .stream()>>>(in_data, in_rows.data(), learning_rate.data(), - out_data, in_row_numel, block_size); + T, 256><<(context) + .stream()>>>(in_data, in_rows.data(), + learning_rate.data(), out_data, + in_row_numel); } }; diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu.cc similarity index 100% rename from paddle/operators/softmax_op.cu rename to paddle/operators/softmax_op.cu.cc diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index ed96e8cee5a78e63ea29ed383d06c1258abdc328..3dbb62d2e571eb92025c1b3fc0a6653c7cda007a 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/operators/softmax_with_cross_entropy_op.h" #include -#include namespace paddle { namespace operators { diff --git a/paddle/operators/split_op.cu b/paddle/operators/split_op.cu.cc similarity index 100% rename from paddle/operators/split_op.cu rename to paddle/operators/split_op.cu.cc diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 57b99bdb3a9359bbfdbe62a6fc9afca6c4d5df9e..c2b7632b2865a3ef66051d815d7722a08c6a8cbd 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -12,7 +12,7 @@ limitations under the License. */ #include "paddle/operators/sum_op.h" #include #include "paddle/framework/var_type_inference.h" -#include "paddle/operators/net_op.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -60,13 +60,16 @@ class SumOp : public framework::OperatorWithKernel { x_vars[0]->Get().value().type()), ctx.device_context()); } else if (x_vars[0]->IsType()) { - auto& array = x_vars[0]->Get(); - for (auto& each : array) { - if (each.numel() != 0) { - return framework::OpKernelType(framework::ToDataType(each.type()), - ctx.device_context()); + for (auto& x_var : x_vars) { + auto& array = x_var->Get(); + for (auto& each : array) { + if (each.numel() != 0) { + return framework::OpKernelType(framework::ToDataType(each.type()), + ctx.device_context()); + } } } + PADDLE_THROW("Cannot find the input data type by all input data"); } PADDLE_THROW("Unexpected branch. Input type is %s", x_vars[0]->Type().name()); @@ -97,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference { auto& inputs = op_desc.Input("X"); auto var_type = framework::VarDesc::SELECTED_ROWS; + for (auto& name : op_desc.Input("X")) { + VLOG(10) << name << " " + << block->FindRecursiveOrCreateVar(name)->GetType(); + } + bool any_input_is_lod_tensor = std::any_of( inputs.begin(), inputs.end(), [block](const std::string& name) { return block->FindRecursiveOrCreateVar(name)->GetType() == @@ -104,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { }); auto is_tensor_array = [block](const std::string& name) { - return block->FindRecursiveOrCreateVar(name)->GetType() == + return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() == framework::VarDesc::LOD_TENSOR_ARRAY; }; @@ -114,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference { std::all_of(inputs.begin(), inputs.end(), is_tensor_array); if (any_input_is_tensor_array) { - PADDLE_ENFORCE(all_inputs_are_tensor_array); + if (!all_inputs_are_tensor_array) { + std::ostringstream os; + for (auto& each : inputs) { + os << " " << each << " type is " + << detail::Ref(block->FindRecursiveOrCreateVar(each)).GetType() + << "\n"; + } + PADDLE_ENFORCE(all_inputs_are_tensor_array, + "Not all inputs are tensor array:\n%s", os.str()); + } var_type = framework::VarDesc::LOD_TENSOR_ARRAY; } else if (any_input_is_lod_tensor) { var_type = framework::VarDesc::LOD_TENSOR; } auto out_var_name = op_desc.Output("Out").front(); - block->FindRecursiveOrCreateVar(out_var_name)->SetType(var_type); + auto& out_var = detail::Ref(block->FindRecursiveOrCreateVar(out_var_name)); + out_var.SetType(var_type); + auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front())); + out_var.SetDataType(in_var.GetDataType()); } }; diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 62e15604c47f25c458abc69ecd1cabf964de39bb..ae1b48d7a8e3d573a5134a822a2ed5ef70511077 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/array_operator.h" - +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp { auto *out = scope.FindVar(Output("Out"))->GetMutable(); if (offset >= out->size()) { + VLOG(10) << "Resize " << Output("Out") << " from " << out->size() + << " to " << offset + 1; out->resize(offset + 1); } auto *out_tensor = &out->at(offset); @@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference { public: void operator()(const framework::OpDescBind &op_desc, framework::BlockDescBind *block) const override { - for (auto &out_var : op_desc.OutputArgumentNames()) { - VLOG(10) << "Set Variable " << out_var << " as LOD_TENSOR_ARRAY"; - block->FindRecursiveOrCreateVar(out_var)->SetType( - framework::VarDesc::LOD_TENSOR_ARRAY); - } + auto x_name = op_desc.Input("X")[0]; + auto out_name = op_desc.Output("Out")[0]; + VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; + auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name), + "Cannot found %s", out_name); + out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY); + auto &x = + detail::Ref(block->FindVarRecursive(x_name), "Cannot found %s", x_name); + out.SetDataType(x.GetDataType()); } }; @@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp { auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out != nullptr, "Out must be set"); - auto *out_tesnor = out->GetMutable(); + auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, dev_ctx); PADDLE_ENFORCE_LT(offset, x_array.size()); - out_tesnor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); - out_tesnor->set_lod(x_array[offset].lod()); + out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); + out_tensor->set_lod(x_array[offset].lod()); } }; diff --git a/paddle/operators/transpose_op.cu b/paddle/operators/transpose_op.cu.cc similarity index 100% rename from paddle/operators/transpose_op.cu rename to paddle/operators/transpose_op.cu.cc diff --git a/paddle/operators/transpose_op.h b/paddle/operators/transpose_op.h index aaa3f47ab5545accd4d1108e0ad6f5a3062186d0..e296032f4147f9f8338148f9e4fef100c7cf816f 100644 --- a/paddle/operators/transpose_op.h +++ b/paddle/operators/transpose_op.h @@ -14,27 +14,44 @@ #pragma once -#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { -template -void EigenTranspose(const framework::ExecutionContext& context, - const framework::Tensor& in, framework::Tensor& out, - std::vector axis) { - Eigen::array permute; - for (int i = 0; i < Rank; i++) { - permute[i] = axis[i]; +template +inline void TransCompute(const int dim, const platform::DeviceContext& dev_ctx, + const framework::Tensor& in, framework::Tensor* out, + const std::vector& axis) { + switch (dim) { + case 1: + math::Transpose trans1; + trans1(dev_ctx, in, out, axis); + break; + case 2: + math::Transpose trans2; + trans2(dev_ctx, in, out, axis); + break; + case 3: + math::Transpose trans3; + trans3(dev_ctx, in, out, axis); + break; + case 4: + math::Transpose trans4; + trans4(dev_ctx, in, out, axis); + break; + case 5: + math::Transpose trans5; + trans5(dev_ctx, in, out, axis); + break; + case 6: + math::Transpose trans6; + trans6(dev_ctx, in, out, axis); + break; + default: + PADDLE_THROW("Tensors with rank at most 6 are supported"); } - auto in_dim = in.dims(); - auto out_dim = out.dims(); - - auto eigen_in = framework::EigenTensor::From(in); - auto eigen_out = framework::EigenTensor::From(out); - auto& dev = context.GetEigenDevice(); - eigen_out.device(dev) = eigen_in.shuffle(permute); } template @@ -47,28 +64,8 @@ class TransposeKernel : public framework::OpKernel { std::vector axis = context.Attr>("axis"); int ndims = axis.size(); - switch (ndims) { - case 1: - EigenTranspose(context, *x, *out, axis); - break; - case 2: - EigenTranspose(context, *x, *out, axis); - break; - case 3: - EigenTranspose(context, *x, *out, axis); - break; - case 4: - EigenTranspose(context, *x, *out, axis); - break; - case 5: - EigenTranspose(context, *x, *out, axis); - break; - case 6: - EigenTranspose(context, *x, *out, axis); - break; - default: - PADDLE_THROW("Tensors with rank at most 6 are supported"); - } + auto& dev_ctx = context.device_context(); + TransCompute(ndims, dev_ctx, *x, out, axis); } }; @@ -80,47 +77,19 @@ class TransposeGradKernel : public framework::OpKernel { context.Input(framework::GradVarName("Out")); auto* x_grad = context.Output(framework::GradVarName("X")); - if (x_grad) { - x_grad->mutable_data(context.GetPlace()); - - std::vector axis = context.Attr>("axis"); - std::vector reversed_axis(axis); + if (!x_grad) return; - for (size_t i = 0; i < axis.size(); i++) { - reversed_axis[axis[i]] = i; - } - - int ndims = axis.size(); + x_grad->mutable_data(context.GetPlace()); + std::vector axis = context.Attr>("axis"); + std::vector reversed_axis(axis); - switch (ndims) { - case 1: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - case 2: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - case 3: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - case 4: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - case 5: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - case 6: - EigenTranspose(context, *out_grad, *x_grad, - reversed_axis); - break; - default: - PADDLE_THROW("Tensors with rank at most 6 are supported"); - } + for (size_t i = 0; i < axis.size(); i++) { + reversed_axis[axis[i]] = i; } + + int ndims = axis.size(); + auto& dev_ctx = context.device_context(); + TransCompute(ndims, dev_ctx, *out_grad, x_grad, reversed_axis); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 4ca6c8507a48507fd29a9c9acae2bdf36ed936ee..dcc59f5ff2ae3a8ca999d72a20cfd5c759987d89 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -14,8 +14,10 @@ #include #include "paddle/framework/executor.h" +#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor; constexpr char kStepBlock[] = "step_block"; constexpr char kCondition[] = "Condition"; constexpr char kStepScopes[] = "StepScopes"; -constexpr char kParamGrads[] = "X@Grad"; constexpr char kParameters[] = "X"; +constexpr char kParamGrads[] = "X@GRAD"; +constexpr char kOutputs[] = "Out"; class WhileOp : public framework::OperatorBase { public: @@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker { kCondition, "(Bool) An scalar. When it's False, the While Op will be terminated.") .AsDuplicable(); - AddOutput("Out", + AddOutput(kOutputs, "A set of variables, which will be assigned with values " - "generated by perators inside the block of While Op.") + "generated by the operators inside the block of While Op.") .AsDuplicable(); AddOutput(kStepScopes, "(StepScopeVar) A vector of local scope, which size equals the " @@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase { auto *step_scopes = scope.FindVar(Input(kStepScopes))->GetMutable(); + auto outside_og_names = Inputs(framework::GradVarName(kOutputs)); + auto inside_og_names = + Attr>("original_output_grad"); + + PADDLE_ENFORCE_EQ(outside_og_names.size(), inside_og_names.size()); + for (auto cur_scope_iter = step_scopes->rbegin(); cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) { + VLOG(3) << "Start backward at time_step " + << cur_scope_iter - step_scopes->rbegin(); + framework::Scope &cur_scope = **cur_scope_iter; + // Link OG from outside to inside + for (size_t i = 0; i < outside_og_names.size(); ++i) { + auto outside_og_name = outside_og_names[i]; + auto inside_og_name = inside_og_names[i]; + VLOG(10) << "Linking outside " << outside_og_name << " --> inside " + << inside_og_name; + auto &og_outside = detail::Ref(scope.FindVar(outside_og_name)); + auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name)); + if (og_outside.Type().hash_code() == + typeid(framework::LoDTensor).hash_code()) { + auto &outside_tensor = og_outside.Get(); + auto &inside_tensor = + detail::Ref(og_inside.GetMutable()); + inside_tensor.set_lod(outside_tensor.lod()); + inside_tensor.ShareDataWith(outside_tensor); + } else if (og_outside.Type().hash_code() == + typeid(framework::LoDTensorArray).hash_code()) { + auto &outside_array = og_outside.Get(); + auto &inside_array = + detail::Ref(og_inside.GetMutable()); + VLOG(10) << outside_og_name << " size = " << outside_array.size(); + inside_array.resize(outside_array.size()); + + for (size_t j = 0; j < inside_array.size(); ++j) { + VLOG(10) << j << " " << outside_array[j].numel(); + if (outside_array[j].numel() != 0) { + inside_array[j].set_lod(outside_array[j].lod()); + inside_array[j].ShareDataWith(outside_array[j]); + } else { + PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0); + } + } + } + } + executor.Run(*program, *cur_scope_iter, block->ID(), false); auto &pg_names = Outputs(kParamGrads); auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); - for (size_t prog_id = 0; prog_id < pg_names.size(); ++prog_id) { - auto inside_grad_name = framework::GradVarName(p_names[prog_id]); + for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { + if (pg_names[param_id] == framework::kEmptyVarName) { + continue; // iterator doesn't have gradient + } + auto inside_grad_name = framework::GradVarName(p_names[param_id]); - // // TODO(tonyyang-savil: Not sure we need the following + // // TODO(tonyyang-svail): Not sure we need the following // // If does not compute gradient of that variable inside rnn, // just // // continue @@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase { // zero gradient variable in step 0 if (cur_scope_iter == step_scopes->rbegin()) { auto *var = (*cur_scope_iter)->FindVar(inside_grad_name); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name); if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; @@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase { attrs["value"] = 0.0f; auto zero_op = framework::OpRegistry::CreateOp( - "fill_constant", {}, {{"Out", {pg_names[prog_id]}}}, attrs); + "fill_constant", {}, {{"Out", {pg_names[param_id]}}}, attrs); zero_op->Run(scope, dev_ctx); } } // sum gradient - auto *outside_var = scope.FindVar(pg_names[prog_id]); - PADDLE_ENFORCE_NOT_NULL(outside_var); - auto &outside_tensor = *outside_var->GetMutable(); - - std::string result_var_name; - auto *local_result_var = (*cur_scope_iter)->Var(&result_var_name); - auto &local_result_tensor = - *local_result_var->GetMutable(); - - local_result_tensor.ShareDataWith(outside_tensor); - + auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( - "sum", {{"X", {result_var_name, inside_grad_name}}}, - {{"Out", {result_var_name}}}, {}); - sum_op->Run(**cur_scope_iter, dev_ctx); + "sum", {{"X", {pg_names[param_id], new_inside_name}}}, + {{"Out", {pg_names[param_id]}}}, {}); + sum_op->Run(cur_scope, dev_ctx); + cur_scope.Rename(new_inside_name, inside_grad_name); } } } @@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { virtual std::unique_ptr Apply() const { auto *grad = new framework::OpDescBind(); grad->SetType("while_grad"); - for (auto &input_param : this->InputNames()) { - grad->SetInput(input_param, this->Input(input_param)); - grad->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param)); + grad->SetInput(kParameters, Input(kParameters)); + grad->SetOutput( + framework::GradVarName(kParameters), + InputGrad(kParameters, /*do not drop empty gradient*/ false)); + grad->SetInput(kOutputs, Output(kOutputs)); + + // OG should be re-calculated by step blocks, since many outputs of while op + // do not need to calculate gradients. + std::unordered_set block_ins; + { + for (auto &p : Input(kParameters)) { + block_ins.insert(p); + } + for (auto &o : Output(kOutputs)) { + block_ins.insert(o); + } } + std::unordered_set extra_inputs; + for (size_t i = 0; i < grad_block_[0]->OpSize(); ++i) { + for (auto &input_name : grad_block_[0]->Op(i)->InputArgumentNames()) { + if (block_ins.find(input_name) != block_ins.end()) { + continue; + } + extra_inputs.insert(input_name); + } - for (auto &output_param : this->OutputNames()) { - grad->SetInput(output_param, this->Output(output_param)); - if (output_param != kStepScopes) { - grad->SetInput(framework::GradVarName(output_param), - this->OutputGrad(output_param)); + for (auto &output_name : grad_block_[0]->Op(i)->OutputArgumentNames()) { + block_ins.insert(output_name); } } + + std::vector extra_inputs_list; + extra_inputs_list.resize(extra_inputs.size()); + std::copy(extra_inputs.begin(), extra_inputs.end(), + extra_inputs_list.begin()); + grad->SetInput(framework::GradVarName(kOutputs), extra_inputs_list); + grad->SetInput(kStepScopes, Output(kStepScopes)); grad->SetAttrMap(this->Attrs()); grad->SetBlockAttr(kStepBlock, *grad_block_[0]); + // record the original output gradient names, since the gradient name of + // while operator could be renamed. + grad->SetAttr("original_output_grad", extra_inputs_list); return std::unique_ptr(grad); } }; +class WhileGradOpVarTypeInference : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind &op_desc, + framework::BlockDescBind *block) const override { + auto p_names = op_desc.Input(kParameters); + auto pg_names = op_desc.Output(framework::GradVarName(kParameters)); + + for (size_t i = 0; i < p_names.size(); ++i) { + auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); + auto *g_var = block->FindVarRecursive(pg_names[i]); + if (g_var != nullptr) { // Gradient could be @EMPTY@ + VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i] + << " type: " << p_var.GetType(); + g_var->SetType(p_var.GetType()); + g_var->SetDataType(p_var.GetDataType()); + } + } + } +}; + +class WhileGradOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + ctx->HasInputs(kParameters); + ctx->HasOutputs(framework::GradVarName(kParameters)); + ctx->HasInputs(kOutputs); + ctx->HasInputs(framework::GradVarName(kOutputs)); + + auto p_names = ctx->Inputs(kParameters); + auto pg_names = ctx->Outputs(kParamGrads); + auto dims = ctx->GetInputsDim(kParameters); + auto var_types = ctx->GetInputsVarType(kParameters); + std::vector names_to_set; + std::vector dims_to_set; + for (size_t i = 0; i < p_names.size(); ++i) { + if (pg_names[i] == framework::kEmptyVarName) { + continue; + } + if (var_types[i] == framework::VarDesc::LOD_TENSOR) { + names_to_set.push_back(pg_names[i]); + dims_to_set.push_back(dims[i]); + } else if (var_types[i] == framework::VarDesc::LOD_TENSOR_ARRAY) { + // not sure how to set the dim of LOD_TENSOR_ARRAY + names_to_set.push_back(pg_names[i]); + dims_to_set.push_back(dims[i]); + } + } + ctx->SetDims(names_to_set, dims_to_set); + } +}; + } // namespace operators } // namespace paddle REGISTER_OPERATOR(while, paddle::operators::WhileOp, paddle::operators::WhileOpMaker, paddle::operators::WhileGradOpDescMaker); +REGISTER_OPERATOR(while_grad, paddle::operators::WhileGradOp, + paddle::operators::WhileGradOpShapeInference, + paddle::operators::WhileGradOpVarTypeInference); diff --git a/paddle/parameter/ParameterUpdateFunctions.cpp b/paddle/parameter/ParameterUpdateFunctions.cpp index 8b3be062b654a52e667626199be8c8bb4a2a96d7..1898598e49652a2829e57329bab6017304cec662 100644 --- a/paddle/parameter/ParameterUpdateFunctions.cpp +++ b/paddle/parameter/ParameterUpdateFunctions.cpp @@ -30,7 +30,7 @@ void sgdUpdateCpu(real learningRate, const real* grad, real* momentumVec) { decayRate *= learningRate; -#ifdef PADDLE_USE_MKLDNN +#ifdef PADDLE_USE_MKLML #pragma omp parallel for #endif for (size_t i = 0; i < size; ++i) { diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index ce3421a3cb840e4c1e872eea12dedc1150c85962..c5d8a6066ef3becb601344590f977a38c2af0a63 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -63,9 +63,10 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) { } \ } while (false) -enum class DataLayout { +enum class DataLayout { // Not use kNHWC, kNCHW, + kNCDHW, kNCHW_VECT_C, }; @@ -107,12 +108,15 @@ class CudnnDataType { } }; -inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { +inline cudnnTensorFormat_t GetCudnnTensorFormat( + const DataLayout& order) { // Not use switch (order) { case DataLayout::kNHWC: return CUDNN_TENSOR_NHWC; case DataLayout::kNCHW: return CUDNN_TENSOR_NCHW; + case DataLayout::kNCDHW: + return CUDNN_TENSOR_NCHW; // TODO(chengduoZH) : add CUDNN_TENSOR_NCDHW default: PADDLE_THROW("Unknown cudnn equivalent for order"); } @@ -139,7 +143,7 @@ class ScopedTensorDescriptor { strides[i] = dims[i + 1] * strides[i + 1]; } // Update tensor descriptor dims setting if groups > 1 - // FIXME(typhoonzero): Assume using NCHW order + // FIXME(typhoonzero): Assume using NCHW or NCDHW order std::vector dims_with_group(dims.begin(), dims.end()); // copy if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; @@ -176,9 +180,10 @@ class ScopedFilterDescriptor { const cudnnDataType_t type, const std::vector& kernel, const int groups = 1) { - // filter layout: MCHW, where M is the number of + // filter layout: MCHW(MCDHW), where M is the number of // output image channels, C is the number of input image channels, - // H and W is height and width of filter. + // D is the depth of the filter, H is the height of the filter, and W is the + // width of the filter. std::vector kernel_with_group(kernel.begin(), kernel.end()); if (groups > 1) { // M /= groups @@ -219,13 +224,15 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ(pads.size(), strides.size()); PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); -#if CUDNN_VERSION < 6000 +#if !CUDNN_VERSION_MIN(6, 0, 0) // cudnn v5 does not support dilation conv, the argument is called upscale // instead of dilations and it is must be one. for (size_t i = 0; i < dilations.size(); ++i) { PADDLE_ENFORCE_EQ( dilations[i], 1, - "Dilations conv is not supported in this cuDNN version"); + "Dilations conv is not supported in this cuDNN version(%d.%d.%d).", + CUDNN_VERSION / 1000, CUDNN_VERSION % 1000 / 100, + CUDNN_VERSION % 100); } #endif diff --git a/paddle/platform/cudnn_helper_test.cc b/paddle/platform/cudnn_helper_test.cc index 6bd85ae1ca8b47b203e0321e9d9224d5cfd3a586..427359f69713b961c4730b697d3ccde5f7085838 100644 --- a/paddle/platform/cudnn_helper_test.cc +++ b/paddle/platform/cudnn_helper_test.cc @@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { EXPECT_EQ(strides[2], 6); EXPECT_EQ(strides[1], 36); EXPECT_EQ(strides[0], 144); + + // test tensor5d: ScopedTensorDescriptor + ScopedTensorDescriptor tensor5d_desc; + std::vector shape_5d = {2, 4, 6, 6, 6}; + auto desc_5d = tensor5d_desc.descriptor(DataLayout::kNCDHW, shape_5d); + + std::vector dims_5d(5); + std::vector strides_5d(5); + paddle::platform::dynload::cudnnGetTensorNdDescriptor( + desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); + + EXPECT_EQ(nd, 5); + for (size_t i = 0; i < dims_5d.size(); ++i) { + EXPECT_EQ(dims_5d[i], shape_5d[i]); + } + EXPECT_EQ(strides_5d[4], 1); + EXPECT_EQ(strides_5d[3], 6); + EXPECT_EQ(strides_5d[2], 36); + EXPECT_EQ(strides_5d[1], 216); + EXPECT_EQ(strides_5d[0], 864); } TEST(CudnnHelper, ScopedFilterDescriptor) { @@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { for (size_t i = 0; i < shape.size(); ++i) { EXPECT_EQ(kernel[i], shape[i]); } + + ScopedFilterDescriptor filter_desc_4d; + std::vector shape_4d = {2, 3, 3, 3}; + auto desc_4d = filter_desc.descriptor(DataLayout::kNCDHW, shape_4d); + + std::vector kernel_4d(4); + paddle::platform::dynload::cudnnGetFilterNdDescriptor( + desc_4d, 4, &type, &format, &nd, kernel_4d.data()); + + EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); + EXPECT_EQ(nd, 4); + for (size_t i = 0; i < shape_4d.size(); ++i) { + EXPECT_EQ(kernel_4d[i], shape_4d[i]); + } } TEST(CudnnHelper, ScopedConvolutionDescriptor) { diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index 6b64539b0a9a4d535a53447fbcc0e458f3ac9129..61a22d9db3e07cbe6fbca0e0b09fedcba232ff6c 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -62,6 +62,8 @@ extern void *cublas_dso_handle; DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ + __macro(cublasSaxpy_v2); \ + __macro(cublasDaxpy_v2); \ __macro(cublasSgemv_v2); \ __macro(cublasDgemv_v2); \ __macro(cublasSgemm_v2); \ diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index f3455a8733862c91eaece629b6684d446672336c..36b216d872138d49bfd5ab6e3499d15d49ebd0ca 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -109,5 +109,10 @@ void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream), "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeer"); } + +void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) { + PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream), + "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"); +} } // namespace platform } // namespace paddle diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h index 37665b97d764fbcfe0964127d230b1d28d90b687..db961f3838af73855312d4cf6a80e2355306e08f 100644 --- a/paddle/platform/gpu_info.h +++ b/paddle/platform/gpu_info.h @@ -60,6 +60,9 @@ void GpuMemcpySync(void *dst, const void *src, size_t count, void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, size_t count, cudaStream_t stream); +//! Set memory dst with value count size asynchronously +void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream); + } // namespace platform } // namespace paddle diff --git a/paddle/scripts/deb/postinst b/paddle/scripts/deb/postinst deleted file mode 100644 index 91620b1ee7569cd17927f44112dfa9279ddbdd32..0000000000000000000000000000000000000000 --- a/paddle/scripts/deb/postinst +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -echo "Post install paddle debian package." -echo "Install some python package used for paddle. You can run " -echo " pip install /usr/opt/paddle/share/wheels/*.whl to install them." -find /usr/ -name '*paddle*.whl' | xargs pip install diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md index 76bc30e59b869d705b6188592b2983ed01114046..f3a6f1dba7588c6b29c1dcae26ec134c1a7f937d 100644 --- a/paddle/scripts/docker/README.md +++ b/paddle/scripts/docker/README.md @@ -2,178 +2,197 @@ ## Goals -We want the building procedure generates Docker images so that we can run PaddlePaddle applications on Kubernetes clusters. +We want to make the building procedures: -We want to build .deb packages so that enterprise users can run PaddlePaddle applications without Docker. +1. Static, can reproduce easily. +1. Generate python `whl` packages that can be widely use cross many distributions. +1. Build different binaries per release to satisfy different environments: + - Binaries for different CUDA and CUDNN versions, like CUDA 7.5, 8.0, 9.0 + - Binaries containing only capi + - Binaries for python with wide unicode support or not. +1. Build docker images with PaddlePaddle pre-installed, so that we can run +PaddlePaddle applications directly in docker or on Kubernetes clusters. -We want to minimize the size of generated Docker images and .deb packages so to reduce the download time. +To achieve this, we created a repo: https://github.com/PaddlePaddle/buildtools +which gives several docker images that are `manylinux1` sufficient. Then we +can build PaddlePaddle using these images to generate corresponding `whl` +binaries. -We want to encapsulate building tools and dependencies in a *development* Docker image so to ease the tools installation for developers. +## Run The Build -Developers use various editors (emacs, vim, Eclipse, Jupyter Notebook), so the development Docker image contains only building tools, not editing tools, and developers are supposed to git clone source code into their development computers and map the code into the development container. +### Build Evironments -We want the procedure and tools also work with testing, continuous integration, and releasing. +The pre-built build environment images are: +| Image | Tag | +| ----- | --- | +| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn5 | +| paddlepaddle/paddle_manylinux_devel | cuda8.0_cudnn5 | +| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn7 | +| paddlepaddle/paddle_manylinux_devel | cuda9.0_cudnn7 | -## Docker Images - -So we need two Docker images for each version of PaddlePaddle: - -1. `paddle:-dev` - - This a development image contains only the development tools and standardizes the building procedure. Users include: +### Start Build - - developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer). - - release engineers -- use this to build the official release from certain branch/tag on Github.com. - - document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages. +Choose one docker image that suit your environment and run the following +command to start a build: - Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment. - - The development image should include the following tools: - - - gcc/clang - - nvcc - - Python - - sphinx - - woboq - - sshd +```bash +git clone https://github.com/PaddlePaddle/Paddle.git +cd Paddle +docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" -e "PYTHON_ABI=cp27-cp27mu" paddlepaddle/paddle_manylinux_devel /paddle/paddle/scripts/docker/build.sh +``` - Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly. +After the build finishes, you can get output `whl` package under +`build/python/dist`. -1. `paddle:` +This command mounts the source directory on the host into `/paddle` in the container, then run the build script `/paddle/paddle/scripts/docker/build.sh` +in the container. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed. - This is the production image, generated using the development image. This image might have multiple variants: +### Build Options - - GPU/AVX `paddle:-gpu` - - GPU/no-AVX `paddle:-gpu-noavx` - - no-GPU/AVX `paddle:` - - no-GPU/no-AVX `paddle:-noavx` +Users can specify the following Docker build arguments with either "ON" or "OFF" value: - We allow users to choose between GPU and no-GPU because the GPU version image is much larger than then the no-GPU version. +| Option | Default | Description | +| ------ | -------- | ----------- | +| `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. | +| `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | +| `WITH_TESTING` | ON | Build unit tests binaries. | +| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | +| `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. | +| `WITH_SWIG_PY` | ON | Build with SWIG python API support. | +| `WITH_C_API` | OFF | Build capi libraries for inference. | +| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | +| `WITH_STYLE_CHECK` | ON | Check the code style when building. | +| `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu | +| `RUN_TEST` | OFF | Run unit test immediently after the build. | +| `WITH_DOC` | OFF | Build docs after build binaries. | +| `WOBOQ` | OFF | Generate WOBOQ code viewer under `build/woboq_out` | - We allow users the choice between AVX and no-AVX, because some cloud providers don't provide AVX-enabled VMs. +## Docker Images -## Development Environment +You can get the latest PaddlePaddle docker images by +`docker pull paddlepaddle/paddle:` or build one by yourself. -Here we describe how to use above two images. We start from considering our daily development environment. +### Official Docker Releases -Developers work on a computer, which is usually a laptop or desktop: +Official docker images at +[here](https://hub.docker.com/r/paddlepaddle/paddle/tags/), +you can choose either latest or images with a release tag like `0.10.0`, +Currently available tags are: - +| Tag | Description | +| ------ | --------------------- | +| latest | latest CPU only image | +| latest-gpu | latest binary with GPU support | +| 0.10.0 | release 0.10.0 CPU only binary image | +| 0.10.0-gpu | release 0.10.0 with GPU support | -or, they might rely on a more sophisticated box (like with GPUs): +### Build Your Own Image - +Build PaddlePaddle docker images are quite simple since PaddlePaddle can +be installed by just running `pip install`. A sample `Dockerfile` is: -A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion. +```dockerfile +FROM nvidia/cuda:7.5-cudnn5-runtime-centos6 +RUN yum install -y centos-release-SCL +RUN yum install -y python27 +# This whl package is generated by previous build steps. +ADD python/dist/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl / +RUN pip install /paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl && rm -f /*.whl +``` +Then build the image by running `docker build -t [REPO]/paddle:[TAG] .` under +the directory containing your own `Dockerfile`. -## Usages +- NOTE: note that you can choose different base images for your environment, you can find all the versions [here](https://hub.docker.com/r/nvidia/cuda/). -### Build the Development Docker Image +### Use Docker Images -The following commands check out the source code to the host and build the development image `paddle:dev`: +Suppose that you have written an application program `train.py` using +PaddlePaddle, we can test and run it using docker: ```bash -git clone https://github.com/PaddlePaddle/Paddle paddle -cd paddle -docker build -t paddle:dev . +docker run --rm -it -v $PWD:/work paddlepaddle/paddle /work/a.py ``` -The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo. - -Users can specify a Ubuntu mirror server for faster downloading: - -```bash -docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com/mirrors.txt . -``` +But this works only if all dependencies of `train.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs. -### Build PaddlePaddle from Source Code +### Run PaddlePaddle Book In Docker -Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host): +Our [book repo](https://github.com/paddlepaddle/book) also provide a docker +image to start a jupiter notebook inside docker so that you can run this book +using docker: ```bash -docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" paddle:dev +docker run -d -p 8888:8888 paddlepaddle/book ``` -This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed. - -`build.sh` builds the following: - -- PaddlePaddle binaries, -- `$PWD/build/paddle-.deb` for production installation, and -- `$PWD/build/Dockerfile`, which builds the production Docker image. +Please refer to https://github.com/paddlepaddle/book if you want to build this +docker image by your self. -Users can specify the following Docker build arguments with either "ON" or "OFF" value: -- `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries. -- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON". -- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command: - ```bash - docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall" - ``` -- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it. +### Run Distributed Applications -### Build the Production Docker Image +In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`. -The following command builds the production image: +Of course, we can manually build an application image and launch the job using the kubectl tool: ```bash -docker build -t paddle -f build/Dockerfile ./build +docker build -f some/Dockerfile -t myapp . +docker tag myapp me/myapp +docker push +kubectl ... ``` -This production image is minimal -- it includes binary `paddle`, the shared library `libpaddle.so`, and Python runtime. +## Docker Images for Developers -### Run PaddlePaddle Applications +We have a special docker image for developers: +`paddlepaddle/paddle:-dev`. This image is also generated from +https://github.com/PaddlePaddle/buildtools -Again the development happens on the host. Suppose that we have a simple application program in `a.py`, we can test and run it using the production image: +This a development image contains only the +development tools and standardizes the building procedure. Users include: -```bash -docker run --rm -it -v $PWD:/work paddle /work/a.py -``` +- developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer). +- release engineers -- use this to build the official release from certain branch/tag on Github.com. +- document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages. -But this works only if all dependencies of `a.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs. +Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment. -### Build and Run PaddlePaddle Applications +The development image contains the following tools: -We need a Dockerfile in https://github.com/paddlepaddle/book that builds Docker image `paddlepaddle/book:`, basing on the PaddlePaddle production image: + - gcc/clang + - nvcc + - Python + - sphinx + - woboq + - sshd -``` -FROM paddlepaddle/paddle: -RUN pip install -U matplotlib jupyter ... -COPY . /book -EXPOSE 8080 -CMD ["jupyter"] -``` +Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly. -The book image is an example of PaddlePaddle application image. We can build it -```bash -git clone https://github.com/paddlepaddle/book -cd book -docker build -t book . -``` +### Development Workflow -### Build and Run Distributed Applications +Here we describe how the workflow goes on. We start from considering our daily development environment. -In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`. +Developers work on a computer, which is usually a laptop or desktop: -Of course, we can manually build an application image and launch the job using the kubectl tool: + -```bash -docker build -f some/Dockerfile -t myapp . -docker tag myapp me/myapp -docker push -kubectl ... -``` +or, they might rely on a more sophisticated box (like with GPUs): + + + +A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion. ### Reading source code with woboq codebrowser + For developers who are interested in the C++ source code, please use -e "WOBOQ=ON" to enable the building of C++ source code into HTML pages using [Woboq codebrowser](https://github.com/woboq/woboq_codebrowser). - The following command builds PaddlePaddle, generates HTML pages from C++ source code, and writes HTML pages into `$HOME/woboq_out` on the host: ```bash -docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddle:dev +docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddlepaddle/paddle:latest-dev ``` - You can open the generated HTML files in your Web browser. Or, if you want to run a Nginx container to serve them for a wider audience, you can run: diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 256500c56a2e05f981825b6ddb2a843f3ba71a83..fda2a2f1b764106a7a108e8c56bc90ce3459e9b5 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -1,23 +1,6 @@ #!/bin/bash -set -xe - - function cmake_gen() { - # Set BASE_IMAGE according to env variables - if [[ ${WITH_GPU} == "ON" ]]; then - BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" - else - BASE_IMAGE="ubuntu:16.04" - fi - - DOCKERFILE_GPU_ENV="" - DOCKERFILE_CUDNN_DSO="" - if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then - DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" - DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" - fi - mkdir -p /paddle/build cd /paddle/build @@ -26,14 +9,32 @@ function cmake_gen() { # delete previous built whl packages rm -rf /paddle/paddle/dist 2>/dev/null || true + # Support build for all python versions, currently + # including cp27-cp27m and cp27-cp27mu. + PYTHON_FLAGS="" + if [ "$1" != "" ]; then + echo "using python abi: $1" + if [ "$1" == "cp27-cp27m" ]; then + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} + PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python + -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + elif [ "$1" == "cp27-cp27mu" ]; then + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} + PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python + -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + fi + fi + cat <& config, } } + if (FLAGS_use_mkldnn) { + CHECK_EQ(FLAGS_trainer_count, 1) << "MKLDNN only need 1 trainer"; + } + if (testing) { LOG(INFO) << "trainer: in testing mode"; if (config_->getOptConfig().use_sparse_remote_updater() || diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index f01ad4142d4fe7c7f7d7aac60d967ea114b93e56..2739878b7f2936ea2da689da0b4caa780516ccc1 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) @@ -28,35 +27,7 @@ if(WITH_PYTHON) ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() -################ test_CompareTwoNets ###################### -add_unittest_without_exec(test_CompareTwoNets - test_CompareTwoNets.cpp) -add_test(NAME test_CompareTwoNets - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets - --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) -############### test_CompareTwoOpts ################### -add_unittest_without_exec(test_CompareTwoOpts - test_CompareTwoOpts.cpp) -add_test(NAME test_CompareTwoOpts - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts - --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf - --num_passes=1 --need_high_accuracy=0 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) - -################# test_CompareSparse ################## -add_unittest_without_exec(test_CompareSparse - test_CompareSparse.cpp) -if(NOT ON_TRAVIS) - add_test(NAME test_CompareSparse - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ./.set_port.sh -p port -n 6 - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) -endif() ################# test_recurrent_machine_generation ############### add_unittest_without_exec(test_recurrent_machine_generation test_recurrent_machine_generation.cpp) diff --git a/paddle/trainer/tests/chunking.conf b/paddle/trainer/tests/chunking.conf deleted file mode 100644 index d88df919df8fee9209336ffa29d724dabe6af31b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/chunking.conf +++ /dev/null @@ -1,125 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -TrainData(ProtoData( - files = 'trainer/tests/train_files.txt', - usage_ratio = 1.0, -)) - -TestData(ProtoData( - files = 'trainer/tests/test_files.txt' -)) - -default_initial_std(1) -default_decay_rate(4e-4) -default_device(0) - -Inputs("features", "word", "pos", "chunk") - -Outputs("crf") - -Layer( - name = "features", - type = "data", - size = 4339, -) - -Layer( - name = "word", - type = "data", - size = 478, -) - -Layer( - name = "pos", - type = "data", - size = 45 -) - -Layer( - name = "chunk", - type = "data", - size = 23 -) - -Layer( - name = "output", - type = "mixed", - size = 23, - bias = False, - device = -1, - inputs = [ - FullMatrixProjection("features", parameter_name="feature_weights"), - # TableProjection("word"), - # TableProjection("pos"), - ], -) - -Layer( - name = "crf", - type = "crf", - size = 23, - device = -1, - inputs = [ - Input("output", parameter_name="crfw"), - "chunk" - ] -) - -Layer( - name = "crf_decoding", - type = "crf_decoding", - size = 23, - device = -1, - inputs = [ - Input("output", parameter_name="crfw"), - "chunk" - ] -) - -Evaluator( - name = "error", - type = "sum", - inputs = "crf_decoding", -) - -''' -# chuck evaluator cannot be used for GPU training -Evaluator( - name = "chunk_f1", - type = "chunk", - inputs = ["crf_decoding", "chunk"], - chunk_scheme = "IOB", - num_chunk_types = 11, -) -''' - -Settings( - algorithm = 'sgd', - batch_size = 100, - average_window = 0.5, - max_average_window = 2500, - learning_rate = 1e-1, - learning_rate_decay_a = 5e-7, - learning_rate_decay_b = 0.75, - l1weight = 0, - l2weight = 1, - c1 = 0.0001, - backoff = 0.5, - owlqn_steps = 100, - max_backoff = 5, -) diff --git a/paddle/trainer/tests/compare_sparse_data b/paddle/trainer/tests/compare_sparse_data deleted file mode 100644 index 18fc6541383d8e8e1687b8fe1abd57aece3d4cfc..0000000000000000000000000000000000000000 Binary files a/paddle/trainer/tests/compare_sparse_data and /dev/null differ diff --git a/paddle/trainer/tests/data_bin_part b/paddle/trainer/tests/data_bin_part deleted file mode 100644 index 66ede391b0cffe6bc9611d3616b7b626864f5c3e..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/data_bin_part +++ /dev/null @@ -1,214 +0,0 @@ -F -X -X -X -X -X -X -X -X -HC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI86HC=TFTIַ;W8T;8TJJ8T&$H=TFTIW8Ю+JJ8HC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI868T8T&9C6HC=TFTIַ;BT&$88&Ӗ5H=TFTIBTHC=TFTIַ;H=TFTIVTHC=TFTIַ;8T8TͅTT8T&86;8T@N8T8T;9H=TFTI8888&86;8@N88HC=TFTIַ;H=TFTIMKHC=TFTIַ;ٟ@17ȣ8Gȣ8/>7;BAUQUT0A?H=TFTIٟ@17G/>7;BAUQUT0HC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTI.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;'JA-EJ@8T-Eބ248TYW.8˔I͚48+E98W8&68H=TFTIAM18Mބ248HC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI HC=TFTIַ;@KH=TFTI@KHC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI#!14UƕT6.Q8T@Ԛ<14ƕT6.Q8@Ԛ<HC=TFTIַ;H=TFTIVTHC=TFTIַ;8T8TͅTT8T&86;8T@N8T8T;9H=TFTI8888&86;8@N88HC=TFTIַ;H=TFTIHC=TFTIַ;ܥ6H=TFTIܥ6HC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTI;9HC=TFTIַ;Q;B !H=TFTIQBHC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI53HW8T;8T8THC=TFTIַ;#!HW8Ю+8H=TFTIHC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI&$HC=TFTIַ;VGD; H=TFTIVGD;  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OGG͡S<%&б ̣ Fۧ11ņAǧ1ņAņA<6ҥ3߫UVKTVU6>VMUF>M5%̋'wuG͡S<% ̣ Fۧ11ņAǧ1ņAņA<6UVKTV6>VMUF>ʶM%̋'  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG̣ '@@@  @@  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG&$O4=ӪN/>K/;8,T O4=ӪN/>K;,T  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG><,9O8.̣ TB0O!./WDSW53,9O8.TB0O!./WDSW  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG:=X̣ QUTG܂=X̣ QTG  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG)'= 0̣ M6ͅTO,@Ԛ<#!=ؐ0̣ M6ͅTO,@Ԛ<  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG/-= 0̣ M6ͅTO,DSDA)'=ؐ0̣ M6ͅTO,DSDA  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG  ̣ Ҧ)GG4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ204AQ.ŞGщQHAVTJD8DAP&$4AQ.щQHAVTD8A4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$R4Q>.ŞGGщQ6?@Ԛ<#!R4Q>.GщQ6?@Ԛ<4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$4Q.ŞGJIGщQDSDA#!4Q.JIGщQDSDA4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$.ŞGٟ@6G5IGщQA7B.ٟ@6G5IGщQ+4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534Q>.ŞGDAP;0T?6T)! 4Q>.A;T6T)4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534Q>.ŞGDAP;0T?6T)! 4Q>.A;T6T)4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ><49KQ.ŞGRGD9HOKJA.ŞG=RJ/-4-Q.RGD9HKJA.RJ4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534AIQ.ŞGщQHAVTJD8DAP)'4AIQ.щQHAVTD8A4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ/-4=R4Q>AE.ŞGC/W99 4R4Q>C/W9CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5UPH>G@Ԛ<UPH>G@Ԛ<CPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5AHACPG@Ԛ<AHACP@Ԛ<CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5MKHFșK>7QKH.CQR>“JMB>WMLG,@Ԛ<MKHFșK>7QKH.CQR>“JMB>WMLG,@Ԛ<CPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH553AHMDP58Qٟ@H3/A@@@/-AHMDP8Qٟ@H3/A@@CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5#!AHACPGDSDA AHACPDSDACPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5YWI==R>H//GM>ϪJRK22U׵AHTUA6)ʪYWI==R>H//GM>ϪJRK22U׵AHTUA6)ʪCPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5 6PH>5HOAB 6PH>5HOABCPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5HG22A@@@HG22A@@ B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O20 N߹-7BO1ַ;L߹-NA7OIַ;)' N߹-7BO1;߹-NA7I B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O,* N߹-BO߹-7O߹-ַ;OʈF<4)' N߹-BO߹-7߹-ַ;OʈF<4 B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O&$A N߹-BO>8ֽHٟ@@Ԛ<#!A N߹-BO>8ٟ@@Ԛ< B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O/- - N߹-C7FBOR1:?T)' - Nں-7BOR1:?T B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O ߹-7O߹-BT ߹-7߹-B B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O/- N߹-BO7FOO?L߹-OǧBT)' N߹-BO7OO?L߹-OT B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O><߹- NLB7FOQӮDDA40AT(",*߹- NLOQӮDDA0AT B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O,* ߹-7BOİU1>CBBUQ4,* ߹-7BOİU1>CBBUQ4 L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>< - /@ʡH9H1RLA¶7/JDO8,T#!N91LN/JD,T L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/b`1RLDA¶7/ - J0EKB8//OEKю2E,/WT)ʪDB1LDN/J0KB8/OEю2E)ʪ L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/20 - 1RLA¶7/J0EO@K&$1LN/J0EO@K L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>T7O=P; >7=P L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ//-DA¶7/1RLJʡHWWT%! DN/1LJʡHWWՄO L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>< - N1RLA¶7CH231RLA¶7//&$N1LN޻/231LN/ L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ//- LGR1¶7/17>>G>GW=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/JHA¶7/C1RLH7/N=,::84SQH9T86N/C1L+N=,ў84SQH9T L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/DB - /@ʡH9H1RLA¶7/JDOEJ< NT΂:8/CT΂:KT΂:WJT΂:ì,UWJ&$ NTCT:Tژ< NT΂:8/CT΂:KT΂:WJT΂:ì,UWJ&$ NTCT:TژBDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ\ZRBDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ;9>R>%B>ڜ>A9TK91A#%@@@20>R>%B>ڜ>A9K91A#@@)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#!#%9TKڜ>BEIUT#9Kڜ>BEIU)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#!#%K9TD06O@Ԛ<#K9D06@Ԛ<)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#%9TCۚK@Ԛ<#9CۚK@Ԛ<)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQGE6W#%>9T?#%6O/OO/U!'B8>ڜ>;96W#>9?#6O/O/U!'B8>ڜ>)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQYW#%9T>K-A96TWB:OSRQ9#%ѾCHTL6LTJH#9>KA96TWB:OSRQ9#%5L6LT,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S, ؓ =BܤKS/C8Tœ =BܤKS8T,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,DBGDG>W-3M8F=Bٟ@6S9ܤKȟN U686GDG>W38F=B5S9ܤKȟN U,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,>THH8@9FFSA@Ԛ<53ER=B67>HH8@9FFA@Ԛ<,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,PN84C81=BRVT6CAE/:6LUUNԛL@;6GDB8C81=BRVTCAE:6LUUNԛL@6G,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,JHH=B/-8>ܤKDA9=S˱U8QTָUJ)ʪDBH=B/8>ܤKDA9=S˱U8QTU)ʪ,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,GEABRBE9A6BϜ>8=B6ץRRDO6ө ۆ ;9ABRBE9A6BϜ>8=B6ץR6ө ,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,;9RQSAEM8=B>ץR9)NU6!GJ53RQSAEC=B>ץR9)NU6!1,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,/-VJV18=BR6?#%@@@)'VJV18=BR6?#@@,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,D>EȊ56RT8JF=BKT:8J=BRFK,34DH@CӽDҾWK?>S@99ISDPDAzxD>Eˊ5RT8S=BАT:8J=BRF,34DH@CӽDҾWK?>S@9ISDPDA,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,V68BXʉ5=B>ܤK%&Ξ)ʉ5VTVEXGVXGV8G&Ξ)VEBVƔ>XVU8—P=ۚKC>JU̟KO4>LV68BX=B>ܤK%&Ξ)ʉ5VTVEXVXV8G&Ξ)VEBV۔>VU8=CJ.4>HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXDCGR@NDCG@NHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX#!6CGDʉ5>R#!6CGDʉ5>RHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX86GR>RP>R699VADSDA20GR>RP>R69VADSDAHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX#!DR߻W99@@@DR߻W99@@HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXUV1;2X4UV1;2XHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXnl>A6߻W$6XT6/ҥ3)T:6X-6ME@EU%!)!MK>A6߻W$6‰XɺRҥ3?:6X-6E@E )PHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXA?6=C߻WED>3K֟MȬTT(#$!,*6=C߻WED>3K֟MȬTT HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX;966GȂ3ʉ5>R>BCT6;3D5366GȂ3ʉ5>R>BCT;3DHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXDC߻WR1@KDCW1@K,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966GE6/KOٟ@—P=>8E9RBHAVTJD8DAP536/Oٟ@=>8ERBHAVTD8A,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966&$CKOI9RB2SCI9COIRB2SC9,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966)'LPKO9RB6P6T LPORB6P6,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966PN6KO9RBEIT6>SK?KI—P=>KI90C9T><6ORBEIT6>SK?K=>K90CT,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966,*кBPKOK=9F9RHG8T#!кBPOK=9FRHG8,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966JHHKO>6/—P=9RH>DAP;0T?6T)!/-HO>6/=RH>A;T6T),*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966MKKO6/—P=KORDB6OKKO696KO6щQ@Ԛ<53O6/=ORDB6KO9O6щQ@Ԛ<,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966,*6/KO9RBDǬP/-C9AT0?9-8ٟ@6EE>PC9AT0?=C9AT0?=#!C9AT0?9-8@Ԛ<#!C9AT0?9-8@Ԛ<C9AT0?=C9AT0?=20ʻ?0?9<9=C9ATVB$/?BRÙKBTA?D>0?9<9=C9ATVB$/BEBC9AT0?=C9AT0?=#!C9AT0?9-8@Ԛ<#!C9AT0?9-8@Ԛ<C9AT0?=C9AT0?=0?6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>D6@Ԛ<B7Uח>D6@Ԛ<6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> Uח>@K Uח>@K6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>ͦBOERB7Uח>ͦBOER6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>8;BٖTTB7Uח>8;BT6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>86AHFS=@=՞RU70ח>GDSPԮK߀320AHF=@=՞RU70ח>GDSPٮK6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> B7Uח>DT("B7Uח>DT6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>6RTU7HˮDDA6TU7HˮDDA6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> B7Uח> B7Uח>6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>20AHFS=@=՞RU70ח>GPB6,*AHF=@=՞RU70ח>GPB  ;GB;9ӱQL4ߩ75Q-<>;G  48@@@<ߩ7>48@@  F9Q?WɤKIԊX>F9Q?WɤK 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK&$R3ϊX46߻WLQG8@Ԛ< RԊX46߻WLQG@Ԛ< 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK><3ϊXR7Q7@475@:ȥB@AT/-ԊXR3@475@:ȥBA 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK"!F>"FN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F720DN/EL>7Aڶ>F7CDƹ;@Ԛ<,*DNȜML>7Aڶ>F7C4@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7)'HN/KN/ڶ>F7=A7B#!HN/KN/ڶ>F7=+N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7 H/67—P=DG@KH/67=D@KN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7ܤKKA7B  ܤKK+N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F77>1T֛7ٟ@9F6U>ʔ71/>ٟ@6LD7>/I/>=щQDDHIN./59Ԛ<ڶ>S-=DN@UW=-щQܭDHTDS=DSDA7>1֛7ٟ@9F6U>ʔ71/>5LD>/I/>=щQDDHIN./51S-=DN@UW=-щQܭDHTDS=DSDAN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7,*DN=8T=4ڶ>F7S@@@)'DN=8T=4ڶ>F7S@@N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7 H/67Dƹ;DG@KH/674D@KN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7>S=>7ʗ74=>SB7ST86D1ƹ;T4>S=>7ʗ74>SB7STN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7V/67=DG@KV/67=D@K$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A>EѾCT86VOTBA?$US/6T9A6APɺDEXET8VOTB$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A53ER91@5H1Bٟ@49AE@@@/-ER91@5H1@49AE@@$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A,*$9656549Q5؂=@Ԛ<,*$9656549Q5؂=@Ԛ<$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149ADB"Ξ)69$R549AIٟ@TN>CJ@@Ԛ<><"69$R549A@TN>CJ@@Ԛ<$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A;9E4WN$RB5H4LDLIĪNCS@K;9E4WN$RB5H4LDLIĪNCS@K$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A53@;5RH$ULT9A6DPDA/-@5RHULT9A6DPDA$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A86DP>E5H"$ĪNL=496A7B/-P>E5H"$ĪNL=496+:/SʡH99SH :S9HDBSWJ9?9?:/SʡH99:/SʡH995ܛ?M)'WJ99:S9:S95ܛ?M:/SʡH99SH :S9H/-:/SʡH999?99?D6T:S9999D6:/SʡH99SH :S9H&$SV:/SʡH99S6TV:S96:/SʡH99SH :S9H#!S:/SʡH999?Έ;F:S99Έ;F:/SʡH99SH :S9HDBSWJ9?9?:/SʡH99:/SʡH995ܛ?M)'WJ99:S9:S95ܛ?M:/SʡH99SH :S9H&$SV:/SʡH999?<>KDH><>KJHRD>HHHHHH<>KDH><>Kwu7RDH><>K,07R2 -.TʆL@ϡS4,ܢEM,.O2J6MKR2 -.TʆL@ϡS4,E,.OJ6DH><>KDH><>KJHRD>HHHHHH<>KDH><>KMKRDH><>K,0IO9491یV0—P=—PH>.E6A?RH><>K,0IO94V0=—PH>.E6DH><>KDH><>KJHRD>HHHHHH<>KDH><>K#!RD>HH<>KDH><>KJHRD>HHHHHH<>KDH><>K/-DH>K=<,D6R=4,@Ԛ<&$D54,D6R=4,@Ԛ<DH><>KDH><>KJHRD>HHHHHH<>KDH><>K7RDH><>K2>7.ʆJ6ʆG1?—P=1?I2K7>>MGMߎM6>JRʆ.J6~.ʆJ6ʆG1?=1?IK7>MGMߎM6>JRʆ.J6DH><>KDH><>KJHRD>HHHHHH<>KDH><>K20RDH><>K2>J6/;IN9,*RH><>K2>J6/;N9DH><>KDH><>KJHRD>HHHHHH<>KDH><>K_]RDH><>K2>ʆ>I2́N4TȇN4TI(—Pބ2>N4ʆN4GERH><>K2>ʆ>I(N4ȇN4I(܉2>NʆNDH><>KDH><>KJHRD>HHHHHH<>KDH><>KGERDH><>K2>J>I2ˏR3˰(IB>—P3ˏR2;9RH><>K2>JIˏR3˰(IB>3ˏR2ʰDBNMG> BMG>JHɵO9FDSC4ʰDBN5>35-=9O2:@@@53ɵO9DSC4B5>I-=O2G@@ʰDBNMG> BMG> LNLBʰDBN@@@LNLB@@ʰDBNMG> BMG>JHɵO9FDSC4ʰDBN5>35-=9O2:@@@53ɵO9DSC4B5>I-=O2G@@ʰDBNMG> BMG>)'$";0Q8ҐJ9ҽ6WH)'$";0Q8ҐJ9ҽ6WHCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA 4AR=J DG@K4AR= D@KCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA/4ARQ=JB4/4ARQ=BCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA><İFE1;TVL8ARO8L0AWН?/Н?T,*İFBTVL8ARO8LAН?-CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA20İFE1;TVL8ARO8L0AW&$İFBTVL8ARO8LACARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOAPNİFE1;TVL8ARO8L0AWWН?W?UUWTН?>;9İFBTVL8ARO8LAWW?UUW?CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA,*İFE1;TVL8O3߫UТ@HT&$İFBTVL8O3߫UТ@HTCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA AR4J AR4JCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOAL/4ARQ>L/4ARQ>CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA&$ŷ5/BAR4JX>BHH9;>B 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9CR9Xnj8@Ԛ<CR9nj8@Ԛ< 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99XUTI9XNS;UOIַ;URIIIKIHBOF;F;N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X9C5I91ӛ?69; 9F5I91ӛ?69; 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99XUC;- 9UC- 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9\ZG9XWF5ԎB@JP11.3>72PNG9WF5BJP11.3>72 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9869XB9ԎB@@OLWFR9B9N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X9X59QCͦ(!995ƋQC 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9 9X@?9@ 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9YW9XB9ԎB@>54WFR9B9 IC70FŔ6ADMIַ;70DB9B9B>54WFR9B9 IC0FŔ61I7 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9DB9XCK29R5>9XWA/1C2ODKOD539C2R5>9WA1C2ODKOD 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X>KTCΚIRН?>AT9>KCΚIR?A 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R920CCTC7VCEICַ;C;-CTCCCVĸIַ;C-C 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9DBHW:9XB9ԎB@=ʼnEDWFR9B99XCT86HW:9B9B=ʼnEDWFR9B99C 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99X@T9XR0ܥ69@T9Rܥ6 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9G7;CT G7;C 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9A?Hʜ2RA@RS9@>9X3>)כ$>;GB;9Hʜ2RA@RS9@>93>)כ$>;G 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9><NR=9XC9S99׵AAKEAABC/;9NR=9C9S99׵AAKEAABC/=@KE= =@E=><@Q0H@KûAQH@KûAQ,HPHCB020@0H@ûAQH@ûAQ,HPHB0=@KE= =@E==@J@KI5@=@J@I5@=@KE= =@E=/-7ûAK3@3@K7KK3!#!7ûAK3@3@7K3=@KE= =@E=86=@KAKCK-3O?3377CT)'=@AKCK-.?.7C=@KE= =@E=/-K6S5@KE=4I,S@@@)'K6S5@E=4I,S@@=@KE= =@E= @K@?@@=@KE= =@E=)'C@ַ;C@GC@K=@AB&$C@ַ;C@GC@=@AB=@KE= =@E=DBIK@KQOַ;OE6V=ԋ J>JT7LJ653IK@QOַ;OE6V=JJ7LJ6ԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+,*ԃP;Q8ȘIK5ܢE4N>4OJAQ8K5>4OԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4б XQT)'AHQ8K5C>4б XQԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+MKԃP;HQ8ȘIK5NC>4ԃP;Q:33ȘIJ82THA>4AQ:33ȘIJ82HԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+/-ԃP;HL-TܢE4NC41TН?> AHL-TC41?ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+,*ԃP;HQ8ȘIK5ܢE4NC>4 AHQ8K5C>4ԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+)'ԃP;E72TܢE4NŇ7̛4б 3QT)'AHQ8K5C>4б 3QԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NCT?TCܢE0&$AHQ8K5CT?T/ԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4XН?2J&$AHQ8K5C>4X2ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?̛4?ETԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?̛4?UԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+#!Q1NÚQ8ȘIKTԃP;4Q1N8KTA4ԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛C1A1J>=)'-AHC;>C1Aܹ1>=ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+~4NU.̤3@>ϥJ=T.-0ܢE4N5H01ԃP;R:?=N.̤3@>PTT>JFF8G3b`4NU.LϥJ=T.-05H01AR:=N.LPT>JFF8GԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+20ԃP;߽4Q8ȘIK5ܢE4N,4U/T&$A߽4Q8K5,4U/TԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+,*Q1ʡH9BXTܢE4NН?̛<7TQ19XT?7ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?>RT&$AHQ8K5C>4?RԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+20ԃP;߽4Q8ȘIK5ܢE4N,4XQT#!A߽4Q8K5,4XQԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4C-HН?̛<&##!)'AHQ8K5C>4*? ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+SQԃP;HQʡHɤUBUHMܢE4NCT۹/8HMT>JT8:G3>JT:GԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+ecԃP;߽4Q8ȘIK5ܢE4N,4ԃP;ܢE4N5NģCF4QO1MJEа.TН?>;9A߽4Q8K5,4AQO-Eа.T?ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+/-ԃP;HUܢE4NCRKD?TيR̛<&$AHUCRKD?TيR̛<ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+,*7ԃP;E72TܢE4NН?>AT7AE7T?AԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+86߹-JН?̛<ԃP;HQ8ȘIK5ܢE4NC>4&$-?AHQ8K5C>4ԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4б XQT)'AHQ8K5C>4б XQԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+#!ԃP;߽4UL6.TܢE4NA߽4UL6TԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+20ԃP;߽4U72TܢE4NԃP;߽4TН?T A߽4U7TA߽4Tܞ?ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+;9ԃP;HQ8ȘIK5ܢE4NC>4 0̛4 0QTIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT /-TIOTބ2BJ768T7P4J#!TIOTބ2BќJ6874TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT JHRTIOT4/ >BԚԚ U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT #!TIOTބ2B>TV>T#!TIOTބ2B>TV>TTIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT SQRP4D3TMɾSBTIOTL;U$ N,%!@Ԛ<;9R4D3TMBTIOTL;U N,@Ԛ<TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 86R9TIOT> BK1١-JL;@@@/-R9TIOT> BK1١-8@@TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT trT>IOTմ2O̤@ROWBǞV<>MɾS3D UJDP>W>5ֈD,DL9ADSDAkiT>IOTմ2@ROWBȞV>M3D UJDP>W>5ֈD,DL9ADSDATIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 53TIOT*B6J768T7P4J2)'TIOT*B6ќJ68742TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT ,*TIOT> ,:%!@Ԛ< TIOT> ,:@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT A?T7IOT> 3D,R,SUUP4J@@@53T7IOT> 3D,R,SU4@@TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 86RT>IOTK>SF> P4J@@@)'RT>IOTKS> 4@@TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT /-TIOTB62LCP4J>T#!TIOTB62C4>TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT MK9QDT7IOT>SFDU>F> ;/?BRÙKBT><9QDT7IOTSDU>F> ;/BEBTIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 20P4JTIOTSUXߢ?U,6XT&$4TIOTSUX?6XTTIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT 20TIOT47>4 3DFDSDA,*TIOT4> 3DFDSDATIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 20TIOTB6J768T7P4BT)'TIOTB6ќJ6874BTTIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT hfRT>IOT> UP4>4—P=AN,:L%!**P4>٬J=$@Ԛ<SQRT>IOT> U4>4=AN,:L**4>٬J=$@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT DBRP4JTIOT>MKJIOTKK DPDA>MKJIOTKK DPDAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M%A%AG  %AAA,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M&'%IIA$ۏ"&'%IIAG&'%II :AGD3AT(%!AG}{&'%IIA&'%IIA&'%II :AD3ATVAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A %AA A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M20%CV2%0J%2CWFTOWW)'%CV2%0%2WFTO9A,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M20%BF%JW DG%AG@F:=#!%<%J D%A@:=A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>MJHD9GM>AQٟ@DBU,G߇;G3MVٟ@6DPDA>AQٟ@DK,G߇;G3MV5DPDAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M/-AG%;̽>MŹ(Źʿ@@@)'AG%;>Ź(Źʿ@@A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M20%DJW.>=V%JW G%A)'%DJW.>=V%J GA 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD86$& C2̙EϪJֈDT9J9@AB/- C2̙EϪJֈDTJ9@AB 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD)' 2EC$&E̛<0>WT 2ECE0>W 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD)'$& C2GE9ֈD@Ԛ<#! C2GE9ֈD@Ԛ< 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD;9Sޡ8$&>&2̙E ֈD>ܤK$'&9Q')'S>&2̙E ֈD>ܤKƋQ' 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD~6AB6T 2EۈXD:ۈX>ў7&B$&,&ίB>T7>KUVJJKUQTI1R/0Qec6AB6T 2EۈXD:ۈX>ў7&B,&ίB>T7KVQI1R/Q 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD$&2@ 8,T2@ ,T 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD_]$֗>AS 19EŹ4(>&24 EB߻WֈD1H%,9: >I\Z$֗>AS 19EŹ4(>&24 EB߻WֈD1H%,: >I 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈDA?$& ۈX2@QTWNEܾW,;PT,T86 ۈX2@QTWNEܾW,;ٱP,T 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD53ޥ0CE$&0> 2EֈDJ<=@,*ޥ0CE0> 2EֈDJ=@;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DHؕ7;EE@;Dؕ7;EE@;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>DXŷ5D/D/ Xŷ5DD;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D ;DHBU>UW6T;DΑB>U6;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DHDHDHT;DDDT;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D ;DH>  ;D>;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D BD/>  BD>;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DH=DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;ӈ5UD>DHDH;5D>DDE1?0;E1?0;ַ;E1?,;@Ԛ<ַ;E1?,;@Ԛ<E1?0;E1?0;;9K6>HE1K/Q4DGKIAB86K6>HE1K/Q4GKIABE1?0;E1?0;GEDKOFHE1K/Q4DGKOJܤK>6DG@K20HE1K/Q4GܤK>6D@KE1?0;E1?0;#!DE1ߢ?08IDE1?1BT/>׆B/1/69IPTR;I@Ԛ<MKܤ5ַ;>E1?1BT/>׆B/1/69IPTR;I@Ԛ<E1?0;E1?0;GEDKOFHE1K/Q4DGKOJܤK>6DG@K20HE1K/Q4GܤK>6D@KE1?0;E1?0;A?A׆B?KUEI3R>7DE1?P;66@Ԛ<;9A׆B?KUEI3>7DE1?P;6@Ԛ<QE1?0;E1?0;1A?Iַ;  1AIE1?0;E1?0;ַ;E1?,;@Ԛ<ַ;E1?,;@Ԛ<E1?0;E1?0;53AUE1AIٟ@;N?985D@@@/-AUE1A@;N?985D@@G=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF)'AOݰFBFASF>LS2 AOFFAF>LSG=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF86ݰFBSFQBJ768T7QݰFBSFB&$FFQBќJ687QFFBG=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF AסET/ݰFBٟ@3@Ԛ<ATFٟ@3@Ԛ<G=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FFSFUR7T FU7T11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ11F֎T V>б 11F֎T Vб 11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ/-SAS11F֎T=>щQCE@@@,*SAS11F֎T=>щQCE@@11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ)'11F֎T=?N;78K11F֎T7K11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ=?N;C;MC;M11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJJH$U-£-E7-Ҳ0AʡH9DS&11F֎T7J6!A?$U-£-E7-Ҳ0AʡH9DS&11F֎T7611F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ;911F֎TBJHį-HUHڶ>2>AR@Ԛ<;911F֎TBJHį-HUHڶ>2>AR@Ԛ<11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ ӪN11F֎TE@@@ӪN11F֎TE@@11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ8611F֎T03VCJ768T711F֎T2011F֎T03VCќJ68711F֎T11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ11F֎T@?11F֎T@11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJSQDR07>I8Ҳ02AXڃN>11F֎TAKAٟ@HDPDAPNDR07>8Ҳ02AXڃN>11F֎TAKAٟ@HDPDA11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ5311F֎TW")$IK46)'11F֎TW")I411F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJJH7&:֎T11F֎TTTT=?N;T!537&:֎T11F֎TTTTTK11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ&$CE>11F֎T@0=@Ԛ<&$CE>11F֎T@0=@Ԛ<11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJPN11F֎T=?N;7=?N;GTTT - !.,11F֎T7GTTT+11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJki11F֎TKSħ;S C9>>4K.TRҲ0AGB@>=?N;)ʪ\Z11F֎TKSS Cޖ>>4K.TRҲ0AGB@>)ʪ11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ53&11F֎TRBOEVCE@@@,*&11F֎TRBOECE@@11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ,*7DT11F֎T6U=?N;7T11F֎T611F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ_]$U-£-E7-Ҳ0AʡH9DS&11F֎T$U-CɤUTҲ0AB!YW$U-£-E7-Ҳ0AʡH9DS&11F֎T$U-CɤUTҲ0AB86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DCJHX,19CʡH97/ - NW=HDE8KDG@K/-X1ʡH97΂NW/D8KD@K86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NWCHDEģCK΂:6T)'X1ʡH97NW޻/DC΂:686X,19CK/ - NW=HDEģCKX1KNW/DC\Z-AX,19CʡH97/ - NW=HDEģCK -:K48?:T><-AX1ʡH97NW/DC -:48?:T86X,19CK/ - NW=HDEģCKX1KNW/DC;9X,19CK/ - NW=HDEGI#!X1KNW/DGI86X,19CK/ - NW=HDEģCKX1KNW/DCb`X,19CʡH97/ - NW=HDEK?IU>DE?T΂:C̛<A?X1ʡH97NW/DE?IUD?΂:C̛<86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NW=HDE8K΂:4T/-X1ʡH97NW/D8K΂:4T86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDE>KX1KNW/D>86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NWCHDEģCK΂:6T)'X1ʡH97NW޻/DC΂:686X,19CK/ - NW=HDEģCKX1KNW/DCSQX,19CʡH97/ - NW=HDEOKDOGDO6G20X1ʡH97NW/DODGD6G86X,19CK/ - NW=HDEģCKX1KNW/DC;9X,19CK/ - NW=HDEGI#!X1KNW/DGI86X,19CK/ - NW=HDEģCKX1KNW/DCJHX,19CʡH97/ - /@CHWDEģCKùBNL,*X1ʡH97N޻/WDCùBNL86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DCqoX,19CʡH97/ - NW=HDE>KL28AWT6O0U—PD7>6;PNX1ʡH97NW/D>LPAW6O0U—PD7>6;86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DC>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66#!NB-<66ODSDAN-<66DSDANB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66><-I66OE60FǂSHAVTJD8DAP/--I66E6FǂSHAVTD8ANB-<66N-<6686-II6I6I66OU>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66#!NB-<6OC8A99N-<6OC8A9NB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66)'D-IHD6/E6-116)'D-IHD6/E6-116NB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66 кB-<ԋ/C66JƱCTкB-<ԋ/C66JϱCNB-<66N-<6686-II6I6I66OU>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66,*NB-=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66865-Н?T  R>ܞ? İU7/ İU7/204UİU7/5.W@ßNWF/ÐWW/-4UİU7/5.W@ßNW/ÐWW İU7/ İU7//-UİU7/.W@ßN1T7̛<,*UİU7/.W@ßN17̛< İU7/ İU7/  -NUİU7/.@K  -NUİU7/.@K İU7/ İU7/534İU7/5:S9İU:4K"!,*4İU7/5:S9İU:4K" İU7/ İU7/86T14UİU7/5.:S9İUAWAT20T14UİU7/5.:S9İUAA İU7/ İU7/;94UİU7/5.W@ßNWF?9GHН?T204UİU7/5.W@ßNW?9G/ İU7/ İU7/204UİU7/5.W@ßNWF/ÐWW/-4UİU7/5.W@ßNW/ÐWW İU7/ İU7/GEUİU7/.W@ßNWF/ɴ9Н?Tɴ9ʡH9?/T;9UİU7/.W@ßNW/ɴ9ܞ?ɴ99/T İU7/ İU7/  -NUİU7/.@K  -NUİU7/.@K İU7/ İU7/#!4UİU7/5.W@ßN#!4UİU7/5.W@ßN İU7/ İU7/86T14UİU7/5.:S9İUAWAT20T14UİU7/5.:S9İUAA İU7/ İU7/864UİU7/5.W@ßNWF/̝5̛FˎWBDIKT)ʪ/-KFEڶ>FˎWBDIK)ʪį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J&$Sį-K>JNTCTT#!Sį-K>JϞNCTTį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J кB6Sį-KIKT:KкB6Sį-KIK:Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J#!;Kʗ,/Sտ7PC@;B ;Kʗ,/Sտ7PC;Bį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J203BBDK6S9A@S@060T203BBDK6S9A@S@060Tį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J/-;Kʗ,/—PL>CBFRKAKB,*;Kʗ,/—PL>CBFRKAKį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J)';Kʗ,/SKD͙7IRN͙7T&$;Kʗ,/SKDIRN͙7Tį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J#!SKб J768T7U>SKб ќJ687U>R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;,BR/>47,BR/>47R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;;9ѹ67,BƸ=DJ7.K/B9A=B@@@&$չ6,BƸ=DJ*/BA@@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;;9R/B,B.P԰'0VAUѹ6FG,*R/B,B.P0VA"Uݹ6GR/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;zxR/,BCMR/@BBR-P2KONJ768T7;2/ޟEŮß1QİL R/Ξ),BWβI3I@K/->ß1QİL R/Ξ),BWβI3I@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;\Z(<7N6B=G;3>7K  #!<K  R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;)'N6@4,BHAR/D@Ԛ<&$N@4,BHAR/D@Ԛ<R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;/-R/,B@Hٟ@ʜ2IAN6@@@)'R/,B@Hٟ@ʜ2IAN@@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;,*/>,BJ>,BJ>,BAB,*/>,BJ>,BJ>,BABB78;U B8;UB;U>C@KB;U>C@KB78;U B8;U,*ʡH9=7B;U>CEJCEJC7CC78N@>;GB B;>8N@>;GB78;U B8;U,*B7;>8N@Ɓ-67Ɓ-6HT#!B;>8N@ȁ-7ȁ-HTB78;U B8;U7B;U>C8,T7B;U>C,TB78;U B8;UB;ULC8,TB;ULC,TB78;U B8;UB;U>C@KB;U>C@KB78;U B8;U)'7B;U>CBU8JCBU8JC7CC7;UN8C.VI<7; B>;UN8C.I7FU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭBJ.ʭB/@? J.ϭB@FU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB)'J.ʭB/L F;F?8,T J.ϭBL F;F,TFU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭB,*J.ʭB/L FUO'GNOC&$J.ϭBL FO'GNOCFU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB,*J.ʭB/L FUOLBڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭBJ.ʭB/>LJ.ϭB>LFU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB53J.ʭB/8NJ.ʭB/G>98F>T,*J.ϭB8NJ.ϭBG>98F>FU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭB/-J.ʭB/8IC¨03?;9<>TJ.ϭB8IϨ0-<>FU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭBJ.ʭB/;J6J.ϭB;J6G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=/-?;8WB=&;WɾS2SCI9)'?;8WB=&;W2SC9G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=/-H޽B;8AE0WB=щQUP.T,*H޽B;8AE0WB=щQUP.G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=JHWBRPI9=50׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=20PG,DNG806WB=C=S7,*PG,DNG85WB=CS7G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=GEW=D,?R;G0G8DN@WG7ӽDIECӽDI>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=20޽BR0WB>=M>I?;8щQ@Ԛ<20޽BR0WB>=M>I?;8щQ@Ԛ<G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=)';80WB=D>щQDSDA&$;80WB=ӗ>щQDSDAG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=JHԓ459D0ԓ4B=SRJ>E;86ST!!";9ԓ45D0ԓ4B=SRJ>E;86STXG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=#!;8>E6QWB=@N ;>E6QWB=@NG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=R8G8>=>PR8G8>=>PG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=VTԓ459D0ԓ4B=O׽RG6ST!!"DBԓ45D0ԓ4B=O׽RG6STXG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBG׫;@2>H8GK0G8WB=F?HG,H,DBG׫;@2>H8GK0G8WB=F?HG,H,G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=\Z7WCȻ22HG/CNK08W=ߌ,3=GGև9>TYW7WCȻ22HG/CNK08W=ߌ,3=GGև9>G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=H$,GG88W-BGHHH$,GG88W-BGHHQH$,GG88W-BGHHDETLBL=,KH$,GG88W-BGHH$,GG88W-BGHQH$,GG88W-BGHDETLBL,KG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=86G,DNG806WB=C=Pֈ;̛׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBTCRJG<8QG8O60G6U<8Gڶ>S=86CJG<8QG8O60G6<8GS=G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBS9I/CD<8JGԓ4GWB-RN= -KF7DBS9I/CD<8JGԓ4GWB-RN= -KF7 ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ520 Ͳ4ʉ5/%DHGAAOC4ˉ5%DHAAOC ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5  ʉ5ޚTDG@K5D@K ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5&$ ۚKʉ5RG̛<"&ۚK݉5G̛<" ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5ʉ5 8,T ʉ5,T ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5207 ʉ5ޚT4L/ȈXʉ5B-AB#!H6=>ʉ5B-AB ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5;9 ۚK4ʉ5G8OE>έ;LSDʡH9;,*ۚK4ʉ5GOE>٭;SDʡH9; ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5#!@ >ʉ5DSDA@>ʉ5DSDA ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5&$$6 6ʉ5@Ԛ<$66ʉ5@Ԛ<,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6 @K-; @K-;,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A653.HB@M64A6OI0щQUP.T/-.HB@M64A6I0щQUP.,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6><.HB@M64A6OHAVTJD8DAP20.HB@M64A6HAVTD8A,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6)'.49B3I6OFUPUT#!.49B3I6FUPU,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6A?ڤ55D>.1B@D4A= @6OG;P20ܤ5D>.1B@4A= @6G;P,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6GE.JS=HB@DH4ADAP;0T?6T)!,*.SHB@H4AA;T6T),*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A620.BKM4AHAVTJD8DAP)'.BKM4AHAVTD8A,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6/-.HB@D4A=6OGUP9T#!.HB@4ASGUP9 @GMT  @GM>  BIɤU1.@GMTC3G9/-VN>BIɤU1.@GMC3G9 @GMT  @GMDB@G.MTA/B@G.MTQ8ޚTNGKTOT,*@G6A/@G6Q8+KTO @GMT  @GM;9@G.MT,;MT73;E=57TIַ;)'@G6,;M7;E57TI @GMT  @GM)'@G@MT/-56P9?ַ;#!@G@M/-56P9? @GMT  @GM@G.MTG@=@GMT.@MTC3G3G9ܞNTTOC3G98Iַ;@G.MTG@=@GMT.@MTki@G6G=@GM.@MC3G3G9NTC3G9I@G6G=@GM.@M @GMT  @GM86@G.4@ϚL4MT;M4߹-WHԓ6Iַ;&$@G.@4M6߹-WHԓ6I @GMT  @GM&$@G.MT߹-5TOOIַ;@G6߹-5TOI @GMT  @GM.@MT.MT@MTܞND>.MTE=.MT=.MTIϪJ1.M@G.@MTDC3G98Iַ;\Z.@M6@MN>6E=6=6IϪJ1.M@G.@MDC3G9I @GMT  @GM&$.M@GMTJ-U@ؙDT#!.M@GMJ-U@ؙDT @GMT  @GM)''=.@GMTIB.<.M@GM6.@M@MEM=6C3G9I @GMT  @GM/-@G=@G.MT=.MTIG@ @G=@G6=6IG @GMT  @GM>  BIɤU1.@GMTC3G9/-VN>BIɤU1.@GMC3G9 @GMT  @GMDBMU@G@MT@MTMTMU,HP5ѳBʈFP?53M@G@M@MMM,HP5ѳBʈFP? @GMT  @GM;9@G.MT,;MT73;E=57TIַ;)'@G6,;M7;E57TI @GMT  @GM53@G.MTַ;@G.MTD,BPַ;Υ6&$@G6ַ;@G6D,Pַ;Υ6cI6;0ڳQ  +0ڳQ I6;ٟ@9ٟ@0A@Ԛ<+90A@Ԛ<KI6;0ڳQ  +0ڳQI6;-N  +-NI6;0ڳQ  +0ڳQ20I6;0ʭBќ:-WI6;I6>S2&$+0ʭBќ:-WI6I6>SoI6;0ڳQ  +0ڳQ&$UII6;-N1D@@@UI+-N1ځD@I6;0ڳQ  +0ڳQSQI6;096WI-:PUPޜFTI—PRMTI6ޜF6JH+096WI-:PUPޜFTIRMTI6ޜF6I6;0ڳQ  +0ڳQA?Q2?EC=E@.=9QCB9QCͦ(!)'Q2?EC=@ƋQCBƋQCiI6;0ڳQ  +0ڳQ .IWI6;8TAB.IW+8TABI6;0ڳQ  +0ڳQ86I6;6U=9=>C<ʡH6IHC<ʡH6IHTI6>6;DPDA86>I6>6;DPDA<I6>6;DPDA86>I6>6;DPDA,*ä=FBNLI6>6;DPDA86>I6>6;DPDA3PϪJBE҄JJ9R>9ֈDCSW9ٟ@192D>9ED>9@S6;,DP>=/UP.T.M@D>3PϪJBEԄJ9R>DCSW@192D>BD>9@S6;,DP>=/UP.I6>6;DPDA86>I6>6;DPDAI6>6;DPDA86>I6>6;DPDA6E>6E>I6>6;DPDA86>I6>6;DPDAI6>6;DPDA86>I6>6;DPDASE>C=,B/7Ȼ;T=.LGENA=C,B/7Ȼ;T=LȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG53>W5CȥWG8E<=?N;†M8T)'>W5CȥWG8E<†M8ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SGhf;>>WȥW,:K>;=?N;7=?N;GTTT - !FD;>>WȥW,:K>;7GTTT+ȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SGDB>W5CWȥWG8E<=?N;TTTG8̛<86>W5CWȥWG8E<TTTG8ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG20ȥW>W2G/I֣.ŞG9/;7;20ȥW>W2G/I֣.ŞG9/;7;ȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG20> >QR@8S֗T7ȥW@@@/-> >QR@8S֗T7ȥW@@ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG20> ȥWS8D0;T=?N;)'> ȥWS8D0;TȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG#!ȥWȥWKȥW,:ĝ ȥWȥWKȥW,:؝ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG_]N9UL=>˾3ȥW> G/NIǡ6TTT=?N;T!DBNU=>˾3ȥW> G/NIǡ6TTTTK  ?J=  ?J=)'VHDJ>4=5D3Ȼ;>T VD>4=5D3Ȼ;>  ?J=  ?J=DJ>?=DJ>?=}  ?J=  ?J=?J=Uа.T ?J=*  ?J=  ?J= J?,= J?,=  ?J=  ?J=;9?EJ=׍Q7E70 NʡH -H064T53?EJ=׍Q,0 NʡH -H064T  ?J=  ?J=DJ>?=GĊA>TDJ>?=GĊA>  ?J=  ?J=/-D9DDG?>J>,NDSDA#!9G?>J>=DSDA  ?J=  ?J=?EJ׍QDG@K?EJ׍QD@K  ?J=  ?J= D/F;  DF;  ?J=  ?J=,*DJ>?=E?NKLF9@K)'DJ>?=E?NKLF9@  ?J=  ?J=?=EJ=׍QPB6?=EJ=׍QPB  ?J=  ?J=;?1KEJ>=׍QCPDCK9K>ٟ@9@9W>4R/ҾWB1.O>NB9KJK>N9͝,ڪ3.WȻBDEA¶7ģC:Q;?1KEJ>=׍QCPDC9>ٟ@9@9W>4R/ҾWB1.O>NB8J>N9Ν,.WȻBDENģC:Q  ?J=  ?J=20?>?J>,N166==@Ԛ<)'?>?J>=16=@Ԛ<  ?J=  ?J=DJ>?=@KDJ>?=@K  ?J=  ?J=>?=4FSCܞN/OJ-0E/-DJ>?=4FSNOJ7E  ?J=  ?J=?J=4Н?A3AT?J=4AA  ?J=  ?J=)'VHDJ>4=5D3Ȼ;>T VD>4=5D3Ȼ;>  ?J=  ?J= ?EJ=׍QFK AB ?EJ=׍QFK AB  ?J=  ?J=?J=Uа.T ?J=*  ?J=  ?J=)'J>?=ʡH۩RV-T.6.T&$J>?=ʡH۩RV-T.6.  ?J=  ?J=;9?EJ=׍Q7E70 NʡH -H064T53?EJ=׍Q,0 NʡH -H064T  ?J=  ?J=20UWX=6?KJJ=3WН?>AT,*UWX=6?KJJ=3W?A  ?J=  ?J=/-D9DDG?>J>,NDSDA#!9G?>J>=DSDA  ?J=  ?J=864?߸3ѝ6B5-0IJ?߸3==I̛=F>>@>T#!DJ7>=F>>@>IFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>&$1FEWK.WKC:ET1FWKWKC:EIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>;9IFEAW̋?6FF1UK>626::@20IFA̋?6.1UK>626::@IFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>&$IKMFE->CϨHQRTIKMF-CΨQRTIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>20IFED6AS1F՟?>>DSDA)'IFD6Aū1?>>DSDAIFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>53FE>>M*ɬI*I*55TH>M*ɬI*I*5THTIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>53HFE>>@IU>J-F>TLP20HF>>@IU>J-F>TLPIFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>20I—P=E>>FEDH>QIB,ܔN)'I=E>>FDH>QIBG DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;:O;4P@Ԛ<:;4P@Ԛ< DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO; -:O;WL/?T -:;W. DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;O:4;DG@KO:;D@K DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:OD>;@K:D>;@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; DO;2  D;2 DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO; :O;2,LDG@K:;2,D@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; :O;2  :;2 DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;28,T:;2,T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;&$ :OƔ>;21ET!! :Ɣ>;21ET DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;28,T:;2,T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; P:O8;:I̺@:TP:8;:@ DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;#!:O;J:O4974T:;J:474T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;DO;2:TD;2:T DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;2DG@K:;2D@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;:O;27Cͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?&$/IMTS;ͺ?ٟ@6A7BITS;ͺ?5+ (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?&$(TS;>6/IM@@@(TS;>6I@@ (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?86/IMTR;>>VBͺ?C7=V-AB)'ITR;>>Bͺ?C7VAB (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?(TS64ͺ?(TS64ͺ? (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ? /IMT;ͺ?DSDAIT;ͺ?DSDA (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?A?/IMPD;Fͺ?M7K/1I-I-@Ԛ<53IPD;Fͺ?MK/I-I-@Ԛ< (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ? /IMF̽>S6>NBIF̽>S6>NB (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?53;ͺ?9T./I/J@/TA/IMT,*;ͺ?9T.I/J@/TAIT (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?86/IM̺ٟ@6ʔ7;Vͺ?2(/IMI@)'I̺5ʔ7;Vͺ?2(II@G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8&$UJG>SIBEU3H8UG>SIB8H8G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8zx7HܞNDG>SEU7HܞNDG>SEUQ7HܞNDG>SEUDET߹-8Lԓ6Iַ;C=.b`7HNG>S87HNG>S8Q7HNG>S8DET߹-8Lԓ6IC=G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8ki/K@G>SEUSTSUQ=WBSEUSIBEU߹-=EMSIַ;BU1TPN/KG>S8SŘSEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8DB-ܞNDG>! )SEUQ-Q;ۓRTCG0/--NG>S8Q-Q;ۓRCG0G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S886RNUG>SEUIBSEU) :/B#!NG>S8IBS8:/G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8/-ܞNDG>SIBEU;SIBEU&$NG>SIB8;SIB8G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8SQ-ܞNDG>SEUQD2VFȣ84XIUҔB<֗TI7Iַ;ŒATJH-NG>S8QD2VFȣ84XIUҔB<֗TI7IŒATG>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8 P PG>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S820DGIBEUSEUV;EUBEU#!DGIB8S8V8B8 DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>#!BCDO94>6O@Ԛ<BRO94>6@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>20DCD97UDE4Oٟ@6AA7B DR5UDMOٟ@6+ DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>PNDBCDIٟ@964E>йSDK9ٟ@9SM>BU-щQ@Ԛ<>޹S @9SM>BU-щQ@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>PNOD6>D=7ADBDCD=9>DIٟ@OD2O@@@>D=7ABR9>D@OD2O@@ DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>b`DBDCDCٟ@9ɤKE7>RɤK/ϪJ>H=Q996ɤKA>A910TDPNBR@9ɤKE7>RɤK/ϪJ>H=Q95ɤKA>A10TD DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>_]DBCDN59OH348BD4R4O@4WOŮPO4/TDOTDBDBRN5O38BD4MO@4WX޵+TOT DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>#!BCD94>A6O@Ԛ<BR94>A6@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54> UCD94>A6?,UR94>A6?, BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1P/-B؇96˩5ֲR1FQ?ٟ@SPG3&$B؇96ֲR1FQ?ٟ@SG BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P#!BOFR6˩5֛7>3PJBOFR673PJ BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1PkiBTS6˩50QN?9H9RIJIН?TXLI/I/I/B=6I6B=-0YWBS60QN?9H9RIJIܞ?ɜXI/II/B=6I6B=0 BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1PDB05OȨKFD9IVBTELȨKF9IV:TН?>/-05OӨKDIVBELӨKIV:? BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1P20BTS6˩50BT6˩51T7H;T#!BS60B617H;T BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P BT66˩50QGН?>B660QG? BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1PBR6˩51?FBTBR61?BT BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P BT؇96˩5M5RFFB؇96M5RFFCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T.TGT6>?>P.TG6>?>PCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T@N>PC @N>PCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T;9G߹-.TGTޚT>9BKR9KϋIL‡KAB20G߹-.TGޚT>BR9KϋIL‡KABCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T)'.TGT6>7KM?U>T .TG6>7KM?,CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T20.T9Kʉ55>A>BK=U;Н?T)'.T9Kʉ55>A>BU;ܞ?CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T/-.T9Kʉ55>A>BK=3RT&$.T9Kʉ55>A>B3ҔRCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6TDBO<>TRIO.TRIOVTIOB<ȬTIQ>86O<>TRO.TROVTIOB<ЬTQCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6TMK.TGT9.DS>9>AK@—PB@ A6O:@@@><.TG9.DS>IAK@B@ A6:@@CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T.TGT6>@Ԛ<.TG6>@Ԛ<̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0;9̾-,4FE4AJT54T?1WQ̛<7T/-;-4E4AJT54T?WQ̛<7̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0PN̾--H,̾-,6.ΩW4L5/B4W5H/OTANя7>1GE̾--H,;-6ΩW4L5/B4W5H/OTAN>1̾-,AJT0ޡ8;-AJT0,*7H984AJT54>0ޡ87̾-,AJT0ޡ8;-AJT0 ̾-/XT ̾-/XT̾-,AJT0ޡ8;-AJT0  ̾-CT  ̾-CT̾-,AJT0ޡ8;-AJT0,*H84AJT540ޡ8>1@K)'H84AJT540>1@K̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0\ZH,̾-,XΩW84ALT540ޡ8>11DD>7U ̾-X̾-X-TMKH,;-X84ALT540>11D>7U ̾-X̾-X-̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0JH8AJTOC6̾-,84L5/TS:-1QBU/;868AJTOC6;-84L5/S:-ڠ#/̾-,AJT0ޡ8;-AJT0,*7H984AJT54>0ޡ87̾-,AJT0ޡ8;-AJT0/-̾-//?84AJT5T;U/T&$̾-//84AJT5T;*̾-,AJT0ޡ8;-AJT0  ̾-CT  ̾-CT̾-,AJT0ޡ8;-AJT0;9̾-,EAJTTDɍPMA:7.U/T/-;-EAJTTDӍPA:7.*̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0ILIL̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0&$̾-4AT95/?V/?T ̾-4AT95/@?T)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?R6!8,TR6,T)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S?531K>QP?F:Bб 4D=3-AB,*1K>QP?Bб 4D=-AB)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?;94F:̔6BUPV715CS?F:@Ԛ<204:̔6BUPV715CS?@Ԛ<)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S?&$̔6ַ;IBUVԋ/CS?F:1IBUVԋ/CS?)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?GEF:̔6BU>ȣ89071KK6S?F:DSDA><:̔6BU>ȣ89071KK6S?DSDA)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S? DA7O=—PRߑ4PTDA7=Rߑ4PT)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S? ?F:6S>JK2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/#!0-0:Nٟ@HFVFT0-:ٟ@HFFTUNDHF/UDHF/GEM:5UND8F/?PS6 1B>UDF?PS6UNDHF/UDHF/PNU2QN5DHF/Bٟ@SKDND SC>K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF//-ӟ;N@R>8FS/"ҥ3!@;6&$ӟ;NR>8FS"ҥ3!@6UNDHF/UDHF/GEM:5K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/&$ FS5/ FS/UNDHF/UDHF/GEM:5K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/>/@K8FENܜ>@K  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A86,ݠ.>O/19O616ABTGA7B/-,>O/19O616ABTG+  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,AO,ݠ.B:DG@KO,BD@K  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A CN,ݠ.QADPDACN,QADPDA  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,A)'Iַ;DN0CT,ݠ.AщQ@Ԛ<Iַ;DNAщQ@Ԛ<  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A,*>T,ݠ.9ABAA4˛5DA4>,9ABA˛5DAn  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,A NT,ݠ.Nĵ*  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,APNX>T9;;>X>QA7AO7RN;X7:U>E8DBX>;>X>QA7AO7N;X7:U>E8  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,AA?O߹-5,ݠ.߹-,ݠ.:߹-HİUMANC)O8,T53O߹-5,߹-,:߹-HMANC)O,TFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D IֈDN0D:DG@KIN0DD@KFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0DIֈDGC?DIGC?DFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0DD:IֈD14  DI1FIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D><يRIֈD:0DيR4IֈD:0BIيR4TC,>)'يRI:0DRI:0BIRCFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D;9IֈD>0EFR4:0>ğCѭDӮD:ٟ@H@Ԛ</-I>0EFM:0>ɟCܮDٟ@H@Ԛ<FIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D,*IֈD:0ߢ?DT7N79UAT#!I:0ߢ?D7N79UAFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D><0IֈD:0D04IֈD:0BI04TC,>/-0I:0D04I:0BI04CC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>;9E87CC@N.H˱U=FCסE@@@2087CC@N.H˱U=FC@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>GE= ->C@N7U0>ٟ@6MVIW>EDSDA><= ->C@NU0>ٟ@6V=>EDSDAC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>A?C@N= -F>EMӛ?ߤ8>4FC@N@@@;9C@N= -F>EMӛ?ߤ8>4C@N@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>DB= ->CסEC@NDE0**ԑ49A*/@@@;9= ->CC@ND0**ԑ49A*/@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>b`B˩55=>O*7C@N7C@ĕ6TFR/HFH4ĕ6TPNB˩55=>O*C@ĕ6TFR/HF4ĕ6TC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>_]6ɵO=>C@Nð.A ->>ٟ@9ٟ@DDܢESܤKA@CסESܤKA@Ԛ<SQ6ɵO=>C@Nð.A ->>9DܢESܤKA@CSܤKA@Ԛ<C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>/-7C@N7 -=FSÐW7#! -=FSÐW7C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>><= -F>C@NPEMӛ?M>>Fٟ@6@Ԛ<;9= -F>C@NPEMӛ?M>>F5@Ԛ<C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>)'C@N= -F(D>M@Ԛ<)'C@N= -F(D>M@Ԛ<HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/JHùBLW¶7/J7H>/B/WȥOB4784/:ĹBN/J84/HS/ON4/:HSON4/,*HS/ON4/:ҁX?L-T#!HSON4/ҁX?L-HS/ON4/:HSON4/&$HW>S/94/:AƭIHW>S94/AHS/ON4/:HSON4/)'7H/B/>ON47/:/HS/ON4/:HSON4/;9H>S/>4NO/://9¶7JùBL/#!>4NO//NJĹB/HS/ON4/:HSON4/;97H>S/>OB47BR:0ABBR:0ABHS/ON4/:HSON4/20/:7H>S/OB47/://HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/&$H>S/OB4">OB4>HS/ON4/:HSON4/,*HS/ON4/:ҁX?L-T#!HSON4/ҁX?L-HS/ON4/:HSON4/PN7HS/47/: ȥǶ,W¶7/>;GB20/ ȥǶ,N/>;GHS/ON4/:HSON4/)'7H/B/>ON47/:/HS/ON4/:HSON4/kiùBL9¶7/J7HS/9ȥ4NO7/:9¶7//:66ȈX4&20ĹBN/J/N//66ȈX4HS/ON4/:HSON4/;97H>S/>OB47BR:0ABBR:0ABHS/ON4/:HSON4/><ʡHU٨I7HS/47/::,AF> ʡHU٨I/:,>HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/HN1,;TLH1,;TLX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:8686X:86˩54X:864VDT(!)'X:864X:86VDTX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86)'E8:X66˩5H38@@@#!E8:X66H38@@X:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:8620X:86ӻBOX:86˩5Q464T#!X:80X:86Q6TX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86GEX:86ӻBOX:86ӻBOX:86H6T$!20X:80X:86OX:866TX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86#!5X:8>6˩56R@Ԛ< 5X:8>66R@Ԛ<߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D> ߹-U.8 ߹-U.8߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>/-U.>DP?14:щQȻ;T=.L)'U.>D?14:щQȻ;T=L߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>)'кBU.6:DP߇;Ȼ;T=.L#!кBU.6:D߇;Ȼ;T=L߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D> U.8߹-U..ʺBPT U.8߹-U..ʺBPT߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D> ߹-U.:/0EFT6 ߹-U.:/0EFT6߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>&$U.VP1B,, 5&$U.VP1B,, 5߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>߹-V;T64߹-V;T6߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>MK߹-U.6>P5,A߹-U.˭V6,3T߹-˭V6܈IU?90GE߹-U.6>P59߹-U.˭V6,3T߹-˭V6߈I?90߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>MK N =.H= F0BU.$D: N =MPMPJH N =U= F0BU.$D: N =MPMP߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>/-߹-U.DI429-DIV=RJ#!߹-U.D429-+RJ߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>53߹-U./0Bб DD7=EUT۹/UD,*߹-U./0Bб DD7,U/D߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>;9߹-U.0NUOބ2E   =ĪC'AB&$߹-U.0NUOǷ. =AB߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>SQ:Aб =>U.=9V>D>9ԚU.V>D>1OISÄN989FT6߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>)'U.>DP6:,Ȼ;T=.L#!U.>D6:,Ȼ;T=L߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D> ߹-1U.9TDSDA ߹-1U.9TDSDA߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>GE3Ԛ9VC=6RMK -PT61TP6531U.D9VC6RK -PT61TڀP߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>;9߹-U.:SM?B;BɵOMSB#**.T53߹-U.:SM?B;BֵOSB#**.  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS&$FMGMM>.3ˠS87TFMGMM>.87  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUS)'VX?AM—PS>SM8GJ#!VX?AM—PS>SٶM1  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS/-U=Sб .65J?O4ʄ/&87&$U=Sб .6JO4ʄ/&8  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUSVT70:7KU6A8>C¾98T—P7—PX>¾987;>C¾98—P7—PX>¾98;  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUSA?U=Sб 7̛<87E7C77̛<(!53U=Sб 7̛<87E7C77̛<  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUS86W7IU8>SESٟ@M߫U@U'@@@20W7IU>SESٟ@M߫U@U'@@  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS>< -4J6 NLF;8T786 -4J6 NLF87 N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.;9/,<7F NCN:QEI/4O5.L,*/<7F NN:+@4O5L N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.b`/QET N/QEVK/QEL9O/Լ=ET/QE/4/VQE1WJH/+T N/+V/+L9O/=T/+//V71W N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.&$ NCN/QEL)5E6>  )@?)@)ٟ@6E6>)5E6>)'UC9S;ٟ@>6E6>@Ԛ<)'UC9S;ٟ@>6E6>@Ԛ<)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>&$VX,)EBٟ@&EϜVQTVX)E@&EϜVQ)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>DBDԚ<(!ٟ@6ES>)%"6"&)'D5ES>)%"6")ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>#!;ښL)E6??OKT;ښL)E6?AT)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>&$8V1)ٟ@>6E6>@Ԛ<#!81)ٟ@>6E6>@Ԛ<)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>53K:S;ٟ@Sٟ@>6E66>GA7B/-K:S;ٟ@Sٟ@>6E66>G+)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>\Z$B)KFE6>RDI6PGH>R5K9>66;NDSPԮK߀3VT$B)KFE6>RDI6PGH>RK9>66;NDSPٮK)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>/-R—Pٟ@)%ٟ@6E6>DPDA&$R—Pٟ@)5E6>DPDA)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>GE!Rٟ@6E6>ٟ@щQKB)B$&9U>щQ@Ԛ<;9!R5E6>ٟ@щQKB)BU>щQ@Ԛ< ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$hf$>I?9TWO$8$>I?9TWO$8Q$>I?9TWO$8,9PMK$>ɞ9WO$8$>ɞ9WO$8Q$>ɞ9WO$8,9P ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$)'֥>$8?9TW8QH.T#!֥>$8ɞ9W8QH.T ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$)'U"҈$4T޲F?9TU"4T޲Fɞ9 ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$trL:V1T>B;W8׫B!UH?I?9T$8CWO?98W8ɳQWQBHO_]L:V1T>;W8׫B!UH?Iɞ9$8CWOǞ9W8ɳQWвQH ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$;9µ$?9Tµ$?9T@M@>KT@/Bɞ9ɞ9@ܱM>K@/ ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$86<4T޲Fɞ9"A/4T޲Fɞ9"Q8+KTO ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$&$?9TQ0"lj:?9TɳQQɞ9Q0"lj:ɞ9ɳQQ ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$ec?9T88I?9T$8WO888O?98QD2CI0C98>ŒATSQɞ988Iɞ9$8WO888O?98QD2C0C9>ŒAT ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$20?9T$8ܞND֥>W8ݶ;UW89T#!ɞ9$8N֥>W΀8U89T ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$,*"҈$4T޲F?9Tlj:""4T޲Fɞ9lj:" ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$53"҈$4T޲F?9TQD2DT#!"4T޲Fɞ9QD2DT ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$A?µ$?9TA=ULEQ?DZ.߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աOMK/N/4ʅ>߰4>N.OX,FJO:9/N/4@@@,*N4NOX,FO:9N@@/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO>߰4>N.Xҥ3߫UBWOFJUQJ&$N4NXUBWOFUQJ/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO6Mӛ?6Mӛ?O;O/N/47>6744B9HS1HŞ1Kį?Dߋ5 Gބ24PK ۥNɿCR S2ބ2B@Bބ2ͩ- ۥN BDBM/N7߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO86/N/4ʅ>߰4>N.XWBOFJUQJ#!N4NXWBOFUQJ/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աOSQ4/N/45F>JFJIݩ5ORܠ94/N/45F>JFJ,*4N54FIݩ5OM4N54F/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO 00,B4.Iַ;@?0,B4.I@/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO&$9:9;2—PX>9:;#!9:9;—PX>9:;,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P)'76BJPT;<̖@@TML&$7BJPT;<̖@@TML,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P,*CF76BJԿ7;˨OO/JIַ;&$CF7BJԿ7;˨OO/JI,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P—PHIL2COJ—PHIL2COJسSB6BJDʿ7E>P—PHIL2COJ—PHIL2COJQ0N>>KJNBIL2COJBIL2COJ۳S6BJϿ7E>PBIL2COJBIL2COJQ0N׎>KɏJ,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>P,*JRJCJD6PV.6;JT)'JRJCJD6PV.6;T,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P)'ARJBJD6PщQU;7P&$ARJBJD6PщQU;7,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P53BܥNFCS7B76BR6HJ>AP/-BܥNFCS7B7B7HJ>AP,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>PYWJǭ;N,6>PJǭ;DƂGщQJǭ;D@щQ,6>G3.ٟ@DƂGщQ@Ԛ<SQJǭ;N7>PJǭ;DƂGщQJǭ;D@щQ7>G3.ٟ@DƂGщQ@Ԛ<,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P/-FJō/NJD0PL36>;GB&$JNJD0PL36>;G,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P#!6ǭ;>Q6NJ>P;7 6>Q6NJ>P;7,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>PCFJB6ǭ;@Ԛ<CFJB6@Ԛ<,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P 6BJD6E>P@Ԛ< 6BJD6E>P@Ԛ<,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P 76BJ>PHڶ>@Ԛ<7BJ>PHڶ>@Ԛ<,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P Lǭ;BϨHJ>PA7BLBϨHJ>P+,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P#!Lǭ;BϨHJ>PDG@KLBϨHJ>PD@K,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P ;-M= ;-M=,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>P ;-M= ;-M=,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P/-ϨHJō/BJ>PϨHJō/BJڶ>F=/-ϨHJō/BJ>PϨHJō/BJڶ>F=,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P2076BCJ>P/G=Q>BD>ÐW,*7BCJ>PG=Q>BD>ÐW,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P Lǭ;BϨHJ>P:ÐW4LBϨHJ>P:ÐW44T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<> <6>7T<@9:T<6>7T?9:4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<> M4TCT7@<@Ԛ<M4CT7@@Ԛ<4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>539TB@>TK7<:7@<ǭ;?AB,*9TB>TK7<:7@ՄNAB4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>A?>T<@>/26SCSET<@>-/7B6;9>T?>/26SCSET?>-/7B64T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>/-T@47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>,*4T<@HAVTJD8DAP4?HAVTD8A4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>4T47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>;94TRF7@<5@2D0O6P6T,*4RI@5@20O6P64T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>,*C>8T<7@<1>DPDA&$C>8<7@1>DPDA6NBUC6O  @U66NBV1UC6O@V1U66NBUC6O  @U6866NBUC-9ԚETBET&$@6OCN3>ETBET,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9/-DHLKD‡?OAO6:,A7B)'DHLKD‡?OAO6:,+,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O986H:!DƇ>O-8WHOWK-4=RJ53H:!DƇ>O-8WHOWK-4RJ,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O986NA9=H5D‡?OJٟ@6:G2@@@,*N9H5D‡?OJ5:G2@@,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9SQDǭ;DQDUH:DO>&DB7DOOJDIPAFE>6MKDǭ;DQDUH:DO>&DB7DOOJDPAF>6,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9&$H=Dć?O=9=ϷAH@H=Dć?O9A@,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9A?,O-HDBٟ@;?=1PK@‡?O=9=@Ԛ<53,O-HD@?=1PK@‡?O9@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9JHH=WK=:B:D‡?O:D1=@9=D9D5@Ԛ<>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9&$H=Dć?O=9=DSDA H=Dć?O9DSDA,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>120H,82,ֈ;04VC7G/T>1)'H,82ڈ;4VCî7/T>1)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1,*V@,1V2,7C7G.V@ M,1V27Cî7.M)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>153H82,7C7G/T>1?TJQ>,*H827Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1/-W?A;OV2,7C7GA.T#!W?ҞMOV27Cî7A.)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>12,>B-45J2>B-5)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1hfH8 -N2,ԓ4DC7G77BK;9/T>1KL/U5 -5>2,WFVTH8 -N24Cî77BK;9/T>1KL/U5 -5>2W)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>186H,7H82,RNVC7G/T7>1 H,>1)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>12C2C)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1V2,7C7G¶;V27Cî7¶;BRADK BRADDBRADKC5—P=—Pބ2RADKC58:-9ET86RADC5=܉2RADC58:-9ETBRADK BRAD><@GD5ՂPRA5HDKϲLK2!QH9T;9@GD5ՂPRA5HDϲLK2!QH9TBRADK BRADJHH 5ՂP2CDKLARAK3DKMK5DKև9>TA?H 5ՂP2RKLARAK3DMK5DKև9>BRADK BRAD,*F7CPL߫WA=RADKS7)'F7CPL߫WA=RADS7BRADK BRADPNՂPLE;ߏGKCRADKCBAMKCK?KCCPD7LRABADK><,9;DR؇9U8ȴS>CPD7LRABADBRADK BRADDBRADKC5—P=—Pބ2RADKC58:-9ET86RADC5=܉2RADC58:-9ETBRADK BRAD\Z:DKCLCBCL5?LFL>HDKCRAK?MKߏGKCBùFPN:DCLCBC5?LL>HDCRAK?MKߏGKCBùFBRADK BRADJHH 5ՂP2CDKLARAK3DKMK5DKև9>TA?H 5ՂP2RKLARAK3DMK5DKև9>BRADK BRADqo -2CDKՂPLARAK3DKMKߏGK HӒC,NDK5=TUߋ5,,=>:J_] -2RKՂPLARAK3DMKߏGK HӒC,D5=TUߋ5,=>: 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;ѤI;;KFABѤI;;KFAB 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI; ѤI;S>>ٟ@6;@Ԛ<ѤI;S>>5;@Ԛ< 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;JH3Ԛ<ѤI;>6;6SF;.TTD6;6SF;.TDTMSѤI22E7>>2OD@TDBѤI;A @69>TMSѤI22E7>>2OD@T 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;><ѤI;B2ѤI;2ѤI;0ѤI;SNѤI;NOFT><ѤI;B2ѤI;2ѤI;0ѤI;SNѤI;NOFT 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;53ѤI;>>;U0>;D6PGDSDA53ѤI;>>;U0>;D6PGDSDA 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;ѤI;M@KѤI;M@K 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI; ѤI;8ٟ@>6CA7BѤI;8ٟ@>6C+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*9EN39>ڹ3T21M1T)'9EN39>ڹ3T2M1T&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*:B7>B31CTCCԃP-C#!:B7>B31CCƠB7Dڹ32:TCG#!5D>B7Dڹ32:CG531TН?>/3>ND3>2HTC.:)'1?/3ND3>2HC.:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG/-Ԋ/BNP92K1W>2Ԋ/Lؒ.=#!Ԋ/N9K1W>2Lؒ.=&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*V>NDͯ?ڹ3F1ȇN;9FGB)'V>NDͯ?ڹ3F1ׇN9FGB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDBNW>ڹ321%K9E?AFF?DJEʡH9?/86NW>ڹ32%K9E?AFFDJE9/&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20;@7:TCUDTڹ3>NщQA7B#!;7:CUDڹ3>NщQ+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG)'AFF?9E1ڹ321KW(#!AFF9E1ڹ32KW(&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG53EEO35B5Iٟ@7A:5Gς16T&$E>3B5@7A:5G+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG86>>8RVGBڹ3;2F5>HK7<653>>8RVGBڹ3;2F5>H7<6&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGA?5BJH:ɚK73GHAVTJD8DAP865BJH:ɚK73GHAVTD8A&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDBDNADV93>R9B>:D:TCS-@@@86DADV93>R9B>:D:CS@@&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGJB7>J3/:J7>J3/:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20AF?9C1NWڹ321K:&87)'AF9C1NWڹ32K:&8&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*9EN39>ڹ3T21M1T)'9EN39>ڹ3T2M1T&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGSQ9DBB3ҾW19659D:QTC2ʶU>3.ٟ@6ǽ=G@Ԛ<A?DBB3ҾW1965ՔDQC2ʶU>3ٟ@6G@Ԛ<&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG531TН?>/3>ND3>2HTC.:)'1?/3ND3>2HC.:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDB53>RD>B7HLTD>B7:LGDSDADB53>RD>B7HLTD>B7:LGDSDA&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*V>NDͯ?ڹ3F1ȇN;9FGB)'V>NDͯ?ڹ3F1ׇN9FGB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*5DBڹ3G><97>?LS:)'5DBڹ3G><97>FS:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20;@7:TCUDTڹ3>NщQA7B#!;7:CUDڹ3>NщQ+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGkiRV>NDڹ32į?1T9Fܫ7MN6K9D,K69.1R3RFBOBTec/>NDڹ32į?1T9Fܫ7MN6K9D,K69.1R3RFBOB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG53EEO35B5Iٟ@7A:5Gς16T&$E>3B5@7A:5G+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGSQ63P7S4DT9I871Dڹ32:TCRٍBKЅJCG>6DB63P7SCT871Dڹ32:CRٍBKЅJC>6&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGA?5BJH:ɚK73GHAVTJD8DAP865BJH:ɚK73GHAVTD8A&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG&$>>8RVGBڹ3;2F5&$>>8RVGBڹ3;2F5يR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR2886يR2A8>يR2A8>8J-IN=JT)'يR2ŞيR2Ş8J-I=JTيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28_]>7JЁH?ʡHWOUA7J1HN=FFHيR28>G@K\Z>7JЁH?ʡHWOUA7J1HN=FFHيR28G@KيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28zx(" -UA7J1H -N= FFHيR28>DA7JUA7DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28><يRJTيR8T يRDН?>QTيR453OD6J=FHيR28D?QيR4يR28يR2886يR2A8>يR2A8>8J-WN8T,*يR2ŞيR2Ş8J-WN8TيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28VT -UA7J1H= FFHيR28>107 A@H۰M3AMK -UA7J1H= FFHيR28107 @H3AيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28;9يR28>9KA8D6P>JщQN.6@Ԛ<53يR289KA8DP>JщQN.6@Ԛ<يR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28b` UA7J1H N=б FFHيR28>DA7JUA7D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD/UPG,NKQM/UPG,KQMUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD /JPC98?UPT/JP98?UPTUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDA?//P//PO—P=-//PC?KP//Pĩ8>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD>9S9Ԛ9S1/9RK@Ԛ<UP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD2059P/ַ;/P/PA/P?PF7,*59P/ַ;/P/P/P?PFUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDPNUP?İUHP.F-S51SSAPK85G6)ʪJHUP?İUH1F-S51SSAPK8G6)ʪUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD UPʡH98CCH/TUP9CCH/TUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDPʰD/Fַ; PʰD/1 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WGE>ܷT18W>/26SCSEܷT18W>-/7B6A?>U8W>/26SCSEU8W>-/7B6 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WܷT1W>/>/CSܷT1W>/USܷT1W>/ܷT1W>/>8M6@66>ќ:0F6267(%!"~UW>/>/CSUW>/USUW>/UW>/>8M6@66>ќ:0F6267 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WGE>ܷT18W>/26SCSEܷT18W>-/7B6A?>U8W>/26SCSEU8W>-/7B6 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WA?6DQ66NیVOH2ܷT18W/Q66;6=;96Q66NیVOH2U8W/Q66;6= ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W8ܷT1OW=;8UOW=; ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W ܷT1WFM>Л6;@KUWF>Л6;@ ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W20A89QEܷT1G4WE>FWAB)'A8ƋQEUG4WE>FAB:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=XMK:?:?LIMW#DE=XWΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X WR:?9Iʉ5X@Ԛ<WR:?Iʉ5X@Ԛ<:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=XVT9WI>:?щQV46V6#6#%6# 6$ 6#88GE9WI>:?щQV6V6#6#6#66#88:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X9Wʉ5X@N9Wʉ5X@N:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X>ܤKV#%ѾCHTL6LT53WR:?IG>ܤKV#%5L6LT:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X;9:?б 9WڶU5PRT53:?9WڶU5PRT:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X53EWN6=A9S:?9I>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X86:?Gʉ5BW/UX7AE3D#&&$:?GBW/UXAED,?RFD,?RF;9,?RFQUBDAP;0T?6T)!&$,?RFQUA;T6T)D,?RFD,?RF/-?,FR>,62ɀ?EBP22>)'8FR>,62ɀ?EBP22D,?RFD,?RFMKD=D3Dٟ@FR?,1@?>19Kٟ@9ٟ@-4,@Ԛ<>19K9-4,@Ԛ<D,?RFD,?RF,*RF>BϨH,@?,6DPDA&$RF>B؋8@86DPDAD,?RFD,?RF>EщQ@Ԛ<53D,.F?H.JV9S6>EщQ@Ԛ<D,?RFD,?RF,*RF,?HAVTJD8DAP R8?HAVTD8AD,?RFD,?RFGED3DRIF,2?.@PیVDHAVDAPT>T0N6Q20S9Xֈ?NXIWN,ڶ>T0N6Q9CXֈ?NXIַ;9Xֈ?NXI,*9CXֈ?NXIַ;M/TۓR7K 9Xֈ?NXIMTۓR79CXֈ?NXIַ;9Xֈ?NXI)'9CXֈ?NXIַ;B<ނBB@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BOPNDD7OC-SO֊2>SDɵO689HAVTJD8DAPA?DD7C-SO>SDɵO689HAVTD8A 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BOA?78BBر/D2ѺKٟ@6TCMUB@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO 57:CDO֊2ѺK@Ԛ<57:CDOѺK@Ԛ< 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BODBкB9N7:CO֊2>ٟ@6߇;1G3F7;Q67;QT><кB9N7:CO>5߇;1G3F7;Q67;QT 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BODB7OB62>R@2A57;QԚ<7;QT7N3>M><7OB62>R@2A57;QԚ<7;QT7N> 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO)'VDD7BC92>/67T)'VDD7BC92>/67T 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO&$V7JR1:2R<@@@#!V7JR1:2R<@@28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15M;928=S0M28GN06W,646T20ſ2=S0Mſ2GN06W,64628>1S6Mſ2>15M><28ٟ@6Q>D1.S6M>=6P6T20ſ25Q>D1.5M>=6P628>1S6Mſ2>15M/-28>M281S6MۓR9T,K&$ƿ2>Mƿ215MۓR9T,K28>1S6Mſ2>15M><281S6M>1H3PیVDHAVDAPT20ſ215M>1H3PیVDHAVAT28>1S6Mſ2>15M53A28=χ71S6MN1SщQχ7=RJ,*Aſ2=χ715MN1SщQχ7RJ28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15MS6MES28@Ԛ<5MESſ2@Ԛ<28>1S6Mſ2>15M><28ٟ@6Q>D1.S6M>=6P6T20ſ25Q>D1.5M>=6P628>1S6Mſ2>15MYWDNԚ<281S6M1H3Vٟ@281DAP;0T?6T)!86Nſ215M1H3Vٟ@ſ21A;T6T)28>1S6Mſ2>15M><281S6M>1H3PیVDHAVDAPT20ſ215M>1H3PیVDHAVAT28>1S6Mſ2>15M53MVٟ@28DAP;0T?6T)!Mٟ@ſ2A;T6T)28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15M#!A281S6MDSDAAƿ215MDSDA —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJA?VNN,̥6:D9SJ6OQNέ;LSDʡH9;86VN,̥6:D9SJ6QN٭;SDʡH9; —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJ53DȂ3@>Q—PJ—PJ>RCRA7B/-DȂ3@>Q—PJ—PJ>RCR+ —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ;95$,U, 6D>:5JЂJDJA7B/-5$,,Ѝ6D>:5JЂJDJ+ —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJG7TQ-G7TQ- —PJ>RJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJDBA2ûR9?A>;BTûR9?A>5653TA2ûR9?A>;BûR9?A>56 —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJ><6JD9SJ6ȻW̑-9ٟ@—PJ>RJ@Ԛ<866JD9SJ6ȻW̑- @—PJ>RJ@Ԛ< —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJMK66DJQ—PL>JRJJQ—PL@BJ9Uڤ55@Ԛ<GE66DJQ—PL>JRJJQ—PL@BJUܤ5@Ԛ< —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ4B4յGWGXF4B4WX —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJ,* R:D>ڝJRK2DG@K&$ЍR:D>ڝJRK2D@K —PJ>RJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJ;95$,U, 6D>:5JЂJ9WWC/-5$,,Ѝ6D>:5JЂJ9āRA —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJ AF8,TЍAF,T —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJ  4B  4B —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ/-UCUTʡH>/X>>A2  UUʡH>/X>2Ѝ —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJ AFPCRJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJ&$UR:D>JB/T&$UR:D>JB/TPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB 2<;> 2<;>PC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB)'V2PKC4EȯB-;J6&$V2PKC4EȯB-;ϜJPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB20T3=C;D9>:CO-֛7:CO-2P:ȯBK6NKDSDA,*>2P:ȯBK6NKDSDAPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB#!PNȯB>9H-BV6#!PNȯB>9H-BV6PC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯBJHDPRȯBIH,56:LIB,I;9V;KXܤK$GEDPRȯBIH,56:LIB,I;V;KXܤK$ X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9,*ȏBҲU>Rɸ˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE B߹-;:XܷT6˩5J˩54 B߹-;:XܷT6˩5/7;9 B-:XܷT6J˩54 B-:XܷT6/7 X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ96T' ߹-X6˩56T ߹-X6 X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9A?Uٟ@5߹-:XD˩5I:XB9D˩5ƛK6@@@;9Uٟ@5߹-:XD˩5I:XBD˩5ƛK6@@ X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE BI;:XܷT6˩5J˩54 BI;:XܷT6˩5/7A? BI;:XܷT6J˩54 BI;:XܷT6/7 X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9;95˱U̾-C3CIQ:> :X>6˩5,;865˱U̾-C3CIQ:> :X>6,; X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9\Z BH 6ӻBO ߹-:XܷTBH ߹-XܷT;W; N= FJ˩54YW BH 6O ߹-:XܷTBH ߹-XܷT;W; N= FJ˩54 X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9)' >6˩55WR9:DSDA  >65Wֲ9DSDA X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9A?б = F߹-=X B˩5HFST:TʡH?CگD/86б = F߹-=X BHFSTT9CگD/ X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9nlRA߹-:X> B6˩51D0;Hٟ@R9:KBB>5IBEKRFTD>6@Ԛ<_]RA߹-:X> B610Hٟ@ֲ9KBB>5IBEKRFTD>6@Ԛ< X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE KB6NEI:X5R9:˩5UIR>:DSDAA? KB6NEI:X5ֲ9˩5UIR>:DSDA X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9DB߹-:XܷT6H߹-:XܷT6˩5Q' ѲB6ӻBO453߹-:XܷT6߹-:XܷT6Q ѲB04 X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9/-0:X6˩50:X6˩5>464T&$0:X60:X6>46T X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9A?б = F߹-=X B˩5HFST:TʡH?CگD/86б = F߹-=X BHFSTT9CگD/ X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9\ZDRA9į?߹-=X>6˩5H0-DE06EщQI.6щQ22DSDAYWDRA9į?߹-=X>6H0-DE06EщQI.6щQ22DSDA NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! N;WHEK9ݠ.ET N;WHE9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ N;WHEKK9ݠ.ET N;WHE9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' N9ݠ.;WKE 9ݠ.ET  N9;WK 9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! N;WKEK9ݠ.ET N;WKK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! NF;WEK9ݠ.ET  NF;WEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' NF;W2T9K9ݠ.ET&$ NF;W2T9K9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! NF;WHK9ݠ.ET  NF;WHK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' N9ݠ.;WHE 9ݠ.ET#! N9;WHE 9ET:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I/-:T678:T67Iַ;—P=8-)':T678:T67I=8-:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I T18:CT67Iַ;T18:T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67Iܥ60T67ȣ8Iַ;ܥ60T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I,*:0EUPU,I:T67Iַ;#!:08P,I:T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I0T67ȣ8Iַ;0T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I:CT67Iַ;@?:T67I@:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I/-:CT67Iַ;:CT67Iַ;;#!:T67I:T67I;:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I,*0EU4J8:CT67Iַ;ܥ6 084J:T67Iܥ6:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I)':CT67Iַ;M/TۓR7K:T67IMTۓR7:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I)':CT67Iַ;M/TۓR7K:T67IMTۓR7:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I P P4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B:D>75.T:D>75.T4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>BMK%X6Xޡ8XSX8XNX.XCXCXFX2X4XCA?%X6Xޡ8XX8XNXXXXFX2X4XC4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>BSQİFE1;/64X۹/>OX۹/>TʭBS>OʭBS>TU>6K53İFB/6X>OX>B>OB>U>64X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B/4?BOBT/4?BOB4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B)'/43>L??HF? FT#!/43>L?HF FT4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>BGE˛59/=T4>X?ޡ8RV4>E1;6T44K2,*-4>/4>B6T5K24X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>BVTDG:/4X>3?X?FBTF?ޡ8H?.:FʭB.4?F6>3?XFBTFޡ8H.FF64X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>BVTE1;>C6PKH,-X?71E70NʡHH064TDBB>C6PKH,-X0NʡHH064T4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B#!U/4X>3B?8,T U/4X>3B?,T4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>B/--446M; ->>@W>W>)'-56M; ->>@>W>4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>B53?41K>F7>>D<(6հL3T53?41K>F7>>D<(6հL3T4X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B_]E1;1-X?P@4BS?H-M>ԁ:FT,;J8L0(!MKB1-X?P@4BS?H-M>ԁ:FT,;J8L0X4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B)'/43?>LHJX/ENB#!/43?>LHJXEN4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>B3H24CM΄/ǟ9=Tޡ8?$ڻ($!3H24CMτ/-8$ڻ($!4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>B,*/43?>LHJX/E1ʞ:-)'/43?>LHJXE1ʞ:-4X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B,*HE1;>/4H?LB/4HLBBQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F869GOBQT2>PVP.5AJ>P)'9GOBL>PP.J>PWA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>FA?COW>M>BWAQT296O8GDSDA;9COW>M>BWAL96O8GDSDAWA>BQT2>FWA>BL>FMKR9GMWWAI>BN==9=A>MNS9=A7B86RךGWWAI>BN==>MNS9=+WA>BQT2>FWA>BL>F209G>BQT2>VJ768T7=&$9G>BL>VќJ687=WA>BQT2>FWA>BL>F;9D9DI>BWRQT2>SQU>V@Ԛ<&$9I>BWRLS>V@Ԛ<WA>BQT2>FWA>BL>F/- -FWLSJ$2AB/- -FWLSJ$2ABWA>BQT2>FWA>BL>F86DS8G>BWAQT2>M@?@@@/-DS8G>BWAL>M@?@@WA>BQT2>FWA>BL>F)'NFHFOFOVVA4@K&$NFHFOFOVVA4@WA>BQT2>FWA>BL>F&$W>V>BWAQT2@Ԛ< W>V>BWAL@Ԛ<WA>BQT2>FWA>BL>F86>BQT2>΂PF;/U N5LUٶ,*>BL>΂PF;/U N5LUWA>BQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F#!>BWBQT2>F@Ԛ<>BWBL>F@Ԛ<WA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>F#!W2E>DQT2ϩNFBWE>DLϩNFWA>BQT2>FWA>BL>FMKR9GMWWAI>BN==9=A>MNS9=A7B86RךGWWAI>BN==>MNS9=+WA>BQT2>FWA>BL>F8F5R.UES28F=.UESWA>BQT2>FWA>BL>F;9D9DI>BWRQT2>SQU>V@Ԛ<&$9I>BWRLS>V@Ԛ<WA>BQT2>FWA>BL>F)'J>R8"FK%FJ>RF%FWA>BQT2>FWA>BL>F86DS8G>BWAQT2>M@?@@@/-DS8G>BWAL>M@?@@WA>BQT2>FWA>BL>F53D96MEK>BQT2>VD@@@&$D96EK>BL>V@@WA>BQT2>FWA>BL>F&$W>V>BWAQT2@Ԛ< W>V>BWAL@Ԛ<WA>BQT2>FWA>BL>F ֖F>PMމ6J6J7+Mމ6J6J7WA>BQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F/-UMӛ?1?7F,7MRQ#!UMӛ?1?MRQWA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>F/-D96M>BWAQT2DSDA&$D96>BWALDSDAIDT0I ID0IDBDT30IDT30I4DT30IDZ.>4I?86D30ID30I4D30IDZ.>4I2IDT0I ID0I,*IDT01,ICDT0IDT#!IDT01ID0IDTIDT0I ID0I&$RIDTN0I0I4@Ԛ<RID00I4@Ԛ<IDT0I ID0I/-0I9Q6S=KI8KI:@Ԛ</-0I9Q6S=KI8KI:@Ԛ<IDT0I ID0I0I5I?0I5I2IDT0I ID0IA?DT3I58DT38I5DT3X58I?53D3I58D38I5D3X58I2IDT0I ID0I0IIַ;4DG@K0II4D@KIDT0I ID0I&$IDTPDN0I0I@Ԛ<IDPD00I@Ԛ<IDT0I ID0I#!I0IػKI0I4I0I#!I0IػKI0I4I0IIDT0I ID0I/-0IDTFַ;8- 8T !0D18- 8IDT0I ID0I0I4@Ԛ<0I4@Ԛ<IDT0I ID0I20D0ID0I4D0IDZ.>4I?/-D0ID0I4D0IDZ.>4I2IDT0I ID0I0IDZ.>4@Ԛ<0IDZ.>4@Ԛ<IDT0I ID0I20IDT,;01,ICDT0IDT)'IDT,;01ID0IDTIDT0I ID0IDBDT30IDT30I4DT30IDZ.>4I?86D30ID30I4D30IDZ.>4I2IDT0I ID0IDB,TܷT0I,TܷT0I4,TܷT0IDZ.>4I?86,ܷT0I,ܷT0I4,ܷT0IDZ.>4I2IDT0I ID0I&$RIDTN0I0I4@Ԛ<RID00I4@Ԛ<IDT0I ID0IMKDT3N0IDT3N0I4DT3N0IDZ.>4I?/-D30D304D30DZ.>4I2IDT0I ID0I0I5I?0I5I2IDT0I ID0I0IػK4@K0IػK4@KIDT0I ID0I0IIַ;4DG@K0II4D@KIDT0I ID0I0IDG@K0ID@KIDT0I ID0I#!I0IػKI0I4I0I#!I0IػKI0I4I0IIDT0I ID0I53DT3IDT3Iַ;DT3OII?#!D3ID3ID3OI2L7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8 L7B6  LB6L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8 L7@K  L@KL7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8 L76?  L6?L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8,*6L78>ٟ@HF@F76>P)'6L8>ٟ@HF@F76>PL7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@820A7L7Hٟ@8EP;:PO@@@,*A7LHٟ@8EP;:PO@@L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8—P,L7?60 ?60L7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8/-L7ٟ@8AR>:6>NDSDA,*Lٟ@8AR>:6>NDSDAL7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8wuL7DF6L7B7L76<6—P,L7 -Gٟ@867@75L78>ٟ@;FJ>N1S_]LDF6LB7L6Ǥ< -Gٟ@867@75L8>ٟ@;FJ>N1S¨0A=Tɾ=S0=Tɾ=S20¨0A=Tɾ=SN.W0AT("0=T̗<.0AT¨0A=Tɾ=S0=Tɾ=S,*¨0A=Tɾ=CPI/C/9?T#!0=PI/C/9?T¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S86¨0ʽ==Tɾ=C6=Tɾ=C6AANTAT#!0=T6=T6AATA¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ7C7C=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S/-=Tɾ=>¨0ʽ=ʇXQޢ0ʇXQޢΉX˛5¨0A/TD¨0A/A4J53-ʇXDQ=>ΉX˛50/TD0/AJ¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S)'¨0A=Tɾ=W9L/͒A4T0=Tɾ=W/͒A4¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S,*¨0A=Tɾ=6=T3OTDA4&$0=Tɾ=6=T3OTDA¨0A=Tɾ=S0=Tɾ=S53¨0A=Tɾ=CDA4AATUʡH9A/,*0=T˾=DAAATUʡH9A/¨0A=Tɾ=S0=Tɾ=S ¨0A=Tɾ=68,T0=Tɾ=6,T¨0A=Tɾ=S0=Tɾ=S20¨0A=Tɾ=SN.W0AT("0=T̗<.0AT¨0A=Tɾ=S0=Tɾ=SDBS48¨0ʽ=P=Tɾ=Cϛ)ϛ)))QTɾ=C98KT/-ФO8-=ϛ)ϛ)))Q98KT¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S#!E=¨0ʽ==Tɾ=.8?̛<=0=Tɾ=.?¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S ¨0AD>=Tɾ=C@K0D>=@K¨0A=Tɾ=S0=Tɾ=S53¨0A=Tɾ=CDA4AATUʡH9A/,*0=T˾=DAAATUʡH9A/¨0A=Tɾ=S0=Tɾ=S=Tɾ=C6¨0AT=T60T  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/)'//Æ.J:NLJS1/GB //Æ.J:NLW/G  S1/W/S1/B;AATW/BAAT  S1/W/ S1D?  WD?  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/53S1/E70C/77S1/AB#!W/E7W/ABw  S1/W/S1/B;AATW/BAAT  S1/W/ABAB  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/209J/?ſQ5ߕJCMCRURН?QT)'9J/?ſQ5ߕJCMCRQ  S1/W/S1/B;AATW/BAAT  S1/W/>9@VWF?Wַ;;E-S1Н?>AT20B>9@VWF?Wַ;;E-W?A  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/S1/IA6W/IA6EG?>-EG?>-/-G?R142TN5=7@P:J#!G?142T5=@PJEG?>-EG?>-)'G?>-PL΅/Bڶ>SJ@Ԛ< G?>-΅/BSJ@Ԛ<EG?>-EG?>- G?T4 G?T4EG?>-EG?>- GW-TG*EG?>-EG?>-E,G?/-"D:EG?/-"DEG?>-EG?>-20G?>-G64?9ʉ5;˫N¶;PNT,*G?>-G4?9ʉ5;ΫNPNTEG?>-EG?>-86G?>-22΅/8B?¶7ģCCщQDPDA,*G?>-΅/8BNCщQDPDAEG?>-EG?>-20NE,G?>-?¶7ʡHWB:ģCO#!NEG?>-NW:CEG?>-EG?>-&$G?>-8G?>-4-2#!G?>-G?>-4-2EG?>-EG?>- G?>-/.BʭBѡ8¶;G?>-/BʭBѡ8¶;>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>,*K=9:ׄ9?DϪJP>؞C@@@ =:ׄ9?DϪJPρ>@@>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>>؞C19Tׄ9?@Ԛ<ρ>19Tׄ9?@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>Ư8Hׄ9?>؞C@@@Ư8Hׄ9?ρ>@@>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1> ׄ9?=7 ׄ9?=7>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>&$>؞Cׄ9?6R1TDPDA ρ>ׄ9?61TDPDA>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>DB=>19Tׄ9?ׄ9B9>>Ư8I>؞Cб †M86><=>19Tׄ9?ׄ9B9>>Ư8Iρ>б †M8>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>#!U—P۴2>MN,BMСGTUP>MNBMСGT>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>hf1 TSׄ9?AJ9JOT,Q SF>T9P,1R>؞Cб :6)ʪ_]1 TSׄ9?A˱9OT,Q SF>T9P,1Rρ>б :6)ʪ>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>A?:91STׄ9?9M,.T>BϪJ9>؞C@@@;9:91STׄ9?9M,.T>BϪJ9ρ>@@#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2#!C14>@D2>@Ԛ<#!C14>@D2>@Ԛ<#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2SQ&L492IщQ—P=&1X4BD71XG:&T6GEީ L492IщQ=&14BD71G:&T6#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2 Cڜ>42K.B@KCڜ>4K.@K#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2;961&6P>429Q1@&@@@2061&6P>4ƋQ1@&@@#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2#! 6E424ڜ>2AЍ6E44ڜ>2A#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>26>42EX@N6>4E@N#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2>4ڜ>F5@Ԛ<>4ڜ>F5@Ԛ<#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2;9CRW6?۱URT:R&6D>62486CRW6?۱URT:R&6D>D4UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8MKUE;6֊2>W6,ϨH@FL6,B,TE;>A7BDBǠ2;6֊2>W6,ϨH@FL6,B,TE;>+UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@853DUE>W@P21HSV9;W@Ԛ<,*DǠ2>W@71HSV9=@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@886UE2V=L296T=ȟN2DS>؞CԚ<20Ǡ22V=L296T=ȟN2DSρ>Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8JH>؞CXAN;WSV626DUE=WL6,6@Ԛ<A?ρ>XAN=SV626DǠ2=WL6,6@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8PN9;2UEDSV16=GB<6>؞Cб :6)ʪDB9;2Ǡ2DSV16=G<ρ>б :6)ʪUEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8DBD7>UE;ASVϨH,ϨHW;62>T6@Ԛ<>Ǡ2;ASV؋8ϨHW;62>T6@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8;9>UE;FWOT7,>A8SVDPDA20>Ǡ2;FW37,>ASVDPDAUEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@886UE616=V6>6L=>؞C@@@/-Ǡ2616=V6>6L=ρ>@@UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8)'>؞CUE;V626DSDA#!ρ>Ǡ2;V626DSDA# UDT #UDT@@@# UDT #UDTJHR6># HL6M9ٟ@UVUӁGDܤK8<# @@@;9R6>#HL6M@UVUӁGA8<#@@# UDT #UDT@@@# UDT #UDT UUD,A#%@@@UUD,A#@@# UDT #UDT@@@# UDT #UDT20UN.T5ƛK,6I16#%@@@,*UN.T5ƛK,6I16#@@# UDT #UDT@@@# UDT #UDTDB9Ԛ<6ϪJ># >Q@D9DFҾWSܤK# @@@,*16>#>Q@9FҾWS#@@# UDT #UDT@@@# UDT #UDT20#%>UӁGD9D.7>#%@@@#!#>UӁG9.7>#@@# UDT #UDT@@@# UDT #UDTMKR, 9S=ɵOʡH9B>UUD=UL9TM# @@@;9R, SɵO9>UUD=UL9TM#@@# UDT #UDT@@@# UDT #UDT&$DПC,UӁGDܤK# @@@DПC,UӁGA#@@# UDT #UDT@@@# UDT #UDT&$E>F# UDK0@@@ E>F#UDK0@@# UDT #UDT@@@# UDT #UDT,*# UDK-щQRQ# @@@#!#UDK-щQRQ#@@T;JC;XH-T;C;XH-DB;JIٟ@FXH-EDܤKV3ET)ʪ86;@FحXH-EDV3ET)ʪT;JC;XH-T;C;XH-YW9T:B7ٟ@)X-;J%)ѾCTO7%T87FD0A?9T:B7ٟ@X-;)ѾCTO7%T8F0T;JC;XH-T;C;XH-86;J85SXH-8E6O@Ԛ<&$;85SXH-8E6@Ԛ<T;JC;XH-T;C;XH-&$5M;J.B7H1R@Ԛ<#!5M;.B7H1R@Ԛ<T;JC;XH-T;C;XH-><;J85SXH-8E6ODSDA,*;85SXH-8E6DSDAT;JC;XH-T;C;XH-PN;JA5DN8R8EBS;76XH-NFK,DPDAA?;A5DNRNBS;5XH-NF,DPDAT;JC;XH-T;C;XH-DB;JIٟ@FXH-EDܤKV3ET)ʪ86;@FحXH-EDV3ET)ʪT;JC;XH-T;C;XH-_]5M;JDCٟ@F26K:X-RB9S8@D69>ҾWD,DPDAPN5M;D@F26K:X-RB9S8@6ߖ>D,DPDAT;JC;XH-T;C;XH-86;J85SXH-8E6O@Ԛ<&$;85SXH-8E6@Ԛ<T;JC;XH-T;C;XH-;J١-ܤKS/@N;١-ܤKS@NT;JC;XH-T;C;XH-><;J85SXH-8E6ODSDA,*;85SXH-8E6DSDAT;JC;XH-T;C;XH-866C; X-NWHT;J)ʪ/-6C;X-NWH;)ʪ/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7GE5DR93A7.8RAƛK2TH?T!HA†M86A?5DR93A78RAƛK2TH?T!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7865RAб D93A7.8RA!@@@205RAб D93A78RA!@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7><ʡH9BR93AV7.RAϪJHA@@@209R93AV7RAϪJHA@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7;9!HA5DR9L9BR7.RϪJ,@Ԛ<86!HA5DR9L9BR7RϪJ,@Ԛ</-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7ki5D93AJR7.BRFD3Bٟ@75Dٟ@7>HAKADP!HA†M86ec5D93AJR7BRFD3Bٟ@75Dٟ@7>HAKADP!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7b`5DR9L9DR7.3>3RQKUDA-D3D!HA†M86\Z5DR9L9DR73>3RQKUDA-D3D!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7/-5D9L9D7.RƭI!@@@)'5D9L9D7RƭI!@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7hf95L9DRG7.3AWDEWKѾCHT7HA7:6)ʪVT95L9DRG73AWDEWK5:6)ʪ/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7><ϪJAHARA9D93ARADϪJ7.K5;9ϪJAHARA9D93ARADϪJ7K5.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<DBW<-7R:.6O/1EPٟ@9ٟ@MBʔ77>P/-W<7R:1EP9MBݔ7>P.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<R-R-.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<539.T&$<-N<%%O܊70>.6O<-  .O<20.6O37;0G .6.6T&$<-N<%%O܊70>.6O<-  .O<.6O8I6T.O8I6T.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<.6OTK6.OTK6.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<20 .6O/EED.6O/EE"W#! ED.6O/E"W.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<DBW<-7R:.6O/1EPٟ@9ٟ@MBDSDA20W<7R:1EP9MBDSDA.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<>4M54Н?A3AT UD1ձM4M54AA  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1,*UD1CT%8>9S1ME;)'UD1CT%8>9S1M;  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1 UD1U3ʡHWRDU UD1U3ʡHWRDU  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1 UD1M>4M5G3UD1ձM4M5G  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1/-U8JD1UHAʡH RGM=T,*U8JD1UHAʡH RGM=  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD120UD1FBLL¶7JѾC4W,M4;#!UD1BNJCW14;  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1&$UD1ʡHR:DGAʈO>6#!UD1ʡHR:DGA>6  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UȂ3.1PD>J١-- AB&$UȂ3.1P>J١-- AB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1SQ>KU9D1M.OGUʡH9>9U199>U69IT@Ԛ<DB>KU9D1M.ǼOU>9U19>U6IT@Ԛ<  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD19ҧK1B—PϪJ>D UD19ҧK1BJ>D  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD19ҧK1B—PϪJ>D UD19ҧK1BJ>D  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UȂ31MC—PQ>DԃPEAB U͂3M—PQ>DUAB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UD1—PRޚ6HU49QÐWB&$UD1Rޚ6HU49QÐWB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1&$UD1W>β7UщQDG@K UD1W>ƴ7щQD@K  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1UD1MʡHRHUUD1MʡHRHU  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1/-UD1@1GM3̛<:9T!#!UD1@1GM3:TW  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1UD1>NVNFUD1>NVNF  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1,*UD1MʡHWRHUJ6J7,*UD1MʡHWRHUJ6J7  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1JHUBMBUD19ҧK1B—PϪJ>DS1UBDBN@Ԛ<>Dū1UDBN@Ԛ<  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD120AʋMQU,D1U>4,3T5=T&$AQU,D1U>435=  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1534UD1M/5S7H47 N H)'4UD1M5S N H  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UD1U/VӲU>/=WQT UD1*ӲU>/=WQ  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1DBUD1UʡHWR6U>G=SU/T()!/-UD1UʡHWR6U>GSUT  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD1M>U.61D3̛<2/ќ6HQT&$UD1KU>1D3/HQ  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1DBUD1ۓR4H5B—PϪJ>D3KT((!)'UD1ۓR4H5BJ>D3KEԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8qoHԼOR@C/D8>ٟ@8 P@NLΊ;J@>@BΊ;RP@NLΊ;J@>DOָ:?ThfHC/D8>ٟ@8 P@NLΊ;J@>@BΊ;RP@NLΊ;J@>Dָ:?TEԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8nlHԼOR@C/8>ٟ@8PHۇLBDCɕH5ǟVGRPHGLBD=ږH5DOָ:?TecHC/8>ٟ@8PHۇLBDCɕH5ǟVGRPHGLBD=ږH5Dָ:?TEԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/8}{M:İU;ԼOR@?R5BD5@E7K՞RWKD5C/8>ٟ@>:T(!K;86_]:?R5BD5@EG՞RʼGD5C/8>ٟ@>:TK;86EԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8#!ß<:Dć?ԼO@C/8>ٟ@ ß<:Dć?OC/8>ٟ@EԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8,*HԼOR@NܒM̺2C/D8>ٟ@#!HNMC/D8>ٟ@EԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/853HԼOR@C/D8>ٟ@RE@>DW/-HC/D8>ٟ@RE@>DWEԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8&$EԼO@C/8>ٟ@DPDA#!EOC/8>ٟ@DPDAEԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8HԼOR@C/8>ٟ@D>AIH!D>HIH:@>Hٟ@/ў7:@՞R.ٟ@/ў79OEEXqoHC/8>ٟ@D>AIH!D>HIH:>Hٟ@/ў7:>ٟ@/ў79EEXEԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/8;9HԼO@C/8>ٟ@H2992653HOC/8>ٟ@H2926EԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/820H?RSHIԓ4>ԼO@‹7C/8>ٟ@)'H?RS>O‹7C/8>ٟ@ \ No newline at end of file diff --git a/paddle/trainer/tests/gen_proto_data.py b/paddle/trainer/tests/gen_proto_data.py deleted file mode 100644 index 8cc6d44673b9f992c28ae95cc06db5ea5aca0642..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/gen_proto_data.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cStringIO import StringIO - -import paddle.proto.DataFormat_pb2 as DataFormat -from google.protobuf.internal.encoder import _EncodeVarint - -import logging -import pprint - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', ) -logger = logging.getLogger('paddle') -logger.setLevel(logging.INFO) - -OOV_POLICY_IGNORE = 0 -OOV_POLICY_USE = 1 -OOV_POLICY_ERROR = 2 - -num_original_columns = 3 - -# Feature combination patterns. -# [[-1,0], [0,0]] means previous token at column 0 and current token at -# column 0 are combined as one feature. -patterns = [ - [[-2, 0]], - [[-1, 0]], - [[0, 0]], - [[1, 0]], - [[2, 0]], - [[-1, 0], [0, 0]], - [[0, 0], [1, 0]], - [[-2, 1]], - [[-1, 1]], - [[0, 1]], - [[1, 1]], - [[2, 1]], - [[-2, 1], [-1, 1]], - [[-1, 1], [0, 1]], - [[0, 1], [1, 1]], - [[1, 1], [2, 1]], - [[-2, 1], [-1, 1], [0, 1]], - [[-1, 1], [0, 1], [1, 1]], - [[0, 1], [1, 1], [2, 1]], -] - - -def make_features(sequence): - length = len(sequence) - num_features = len(sequence[0]) - - def get_features(pos): - if pos < 0: - return ['#B%s' % -pos] * num_features - if pos >= length: - return ['#E%s' % (pos - length + 1)] * num_features - return sequence[pos] - - for i in xrange(length): - for pattern in patterns: - fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern]) - sequence[i].append(fname) - - -''' -Source file format: -Each line is for one timestep. The features are separated by space. -An empty line indicates end of a sequence. - -cutoff: a list of numbers. If count of a feature is smaller than this, - it will be ignored. -if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of -i-th column. - -return a list of dict for each column -''' - - -def create_dictionaries(filename, cutoff, oov_policy): - def add_to_dict(sequence, dicts): - num_features = len(dicts) - for features in sequence: - l = len(features) - assert l == num_features, "Wrong number of features " + line - for i in xrange(l): - if features[i] in dicts[i]: - dicts[i][features[i]] += 1 - else: - dicts[i][features[i]] = 1 - - num_features = len(cutoff) - dicts = [] - for i in xrange(num_features): - dicts.append(dict()) - - f = open(filename, 'rb') - - sequence = [] - - for line in f: - line = line.strip() - if not line: - make_features(sequence) - add_to_dict(sequence, dicts) - sequence = [] - continue - features = line.split(' ') - sequence.append(features) - - for i in xrange(num_features): - dct = dicts[i] - n = 1 if oov_policy[i] == OOV_POLICY_USE else 0 - todo = [] - for k, v in dct.iteritems(): - if v < cutoff[i]: - todo.append(k) - else: - dct[k] = n - n += 1 - - if oov_policy[i] == OOV_POLICY_USE: - # placeholder so that len(dct) will be the number of features - # including OOV - dct['#OOV#'] = 0 - - logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo))) - for k in todo: - del dct[k] - - f.close() - return dicts - - -def encode_varint(v): - out = StringIO() - _EncodeVarint(out.write, v) - return out.getvalue() - - -def write_proto(file, message): - s = message.SerializeToString() - packed_len = encode_varint(len(s)) - file.write(packed_len + s) - - -''' -if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not -existed in dicts[i] will be assigned to id 0. -if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist -in dicts[i]. -''' - - -def gen_proto_file(input_file, dicts, oov_policy, output_file): - def write_sequence(out, sequence): - num_features = len(dicts) - is_beginning = True - for features in sequence: - assert len(features) == num_features, \ - "Wrong number of features: " + line - sample = DataFormat.DataSample() - for i in xrange(num_original_columns): - id = dicts[i].get(features[i], -1) - if id != -1: - sample.id_slots.append(id) - elif oov_policy[i] == OOV_POLICY_IGNORE: - sample.id_slots.append(0xffffffff) - elif oov_policy[i] == OOV_POLICY_ERROR: - logger.fatal("Unknown token: %s" % features[i]) - else: - sample.id_slots.append(0) - - if patterns: - dim = 0 - vec = sample.vector_slots.add() - for i in xrange(num_original_columns, num_features): - id = dicts[i].get(features[i], -1) - if id != -1: - vec.ids.append(dim + id) - elif oov_policy[i] == OOV_POLICY_IGNORE: - pass - elif oov_policy[i] == OOV_POLICY_ERROR: - logger.fatal("Unknown token: %s" % features[i]) - else: - vec.ids.append(dim + 0) - - dim += len(dicts[i]) - - sample.is_beginning = is_beginning - is_beginning = False - write_proto(out, sample) - - num_features = len(dicts) - f = open(input_file, 'rb') - out = open(output_file, 'wb') - - header = DataFormat.DataHeader() - if patterns: - slot_def = header.slot_defs.add() - slot_def.type = DataFormat.SlotDef.VECTOR_SPARSE_NON_VALUE - slot_def.dim = sum( - [len(dicts[i]) for i in xrange(num_original_columns, len(dicts))]) - logger.info("feature_dim=%s" % slot_def.dim) - - for i in xrange(num_original_columns): - slot_def = header.slot_defs.add() - slot_def.type = DataFormat.SlotDef.INDEX - slot_def.dim = len(dicts[i]) - - write_proto(out, header) - - num_sequences = 0 - sequence = [] - for line in f: - line = line.strip() - if not line: - make_features(sequence) - write_sequence(out, sequence) - sequence = [] - num_sequences += 1 - continue - features = line.split(' ') - sequence.append(features) - - f.close() - out.close() - - logger.info("num_sequences=%s" % num_sequences) - - -dict2 = { - 'B-ADJP': 0, - 'I-ADJP': 1, - 'B-ADVP': 2, - 'I-ADVP': 3, - 'B-CONJP': 4, - 'I-CONJP': 5, - 'B-INTJ': 6, - 'I-INTJ': 7, - 'B-LST': 8, - 'I-LST': 9, - 'B-NP': 10, - 'I-NP': 11, - 'B-PP': 12, - 'I-PP': 13, - 'B-PRT': 14, - 'I-PRT': 15, - 'B-SBAR': 16, - 'I-SBAR': 17, - 'B-UCP': 18, - 'I-UCP': 19, - 'B-VP': 20, - 'I-VP': 21, - 'O': 22 -} - -if __name__ == '__main__': - cutoff = [3, 1, 0] - cutoff += [3] * len(patterns) - oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR] - oov_policy += [OOV_POLICY_IGNORE] * len(patterns) - dicts = create_dictionaries('trainer/tests/train.txt', cutoff, oov_policy) - dicts[2] = dict2 - gen_proto_file('trainer/tests/train.txt', dicts, oov_policy, - 'trainer/tests/train_proto.bin') - gen_proto_file('trainer/tests/test.txt', dicts, oov_policy, - 'trainer/tests/test_proto.bin') diff --git a/paddle/trainer/tests/mnist.list b/paddle/trainer/tests/mnist.list deleted file mode 100644 index 703e87753d5a4f507aad11a6d875cea44787667b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/mnist.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/mnist_bin_part diff --git a/paddle/trainer/tests/mnist_bin_part b/paddle/trainer/tests/mnist_bin_part deleted file mode 100644 index 08b93a0ebb5698bdafbc36c3c757918a50bab621..0000000000000000000000000000000000000000 Binary files a/paddle/trainer/tests/mnist_bin_part and /dev/null differ diff --git a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data b/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data deleted file mode 100644 index f189b21e86a50d70d317b5e43aa2d6e05af5e774..0000000000000000000000000000000000000000 Binary files a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data and /dev/null differ diff --git a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist b/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist deleted file mode 100644 index 6b406dff0ba91b5f310d7eafa111c0d21d6542c3..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist +++ /dev/null @@ -1 +0,0 @@ -./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data diff --git a/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf b/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf deleted file mode 100644 index 92f32a18c0068ab4672034a270aa8c52f2716d59..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf +++ /dev/null @@ -1,154 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -# Note: when making change to this file, please make sure -# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest -# for comparing these two nets can pass (test_CompareTwoNets) - -default_initial_std(0.1) -default_device(0) - -word_dim = 999 -l1 = 0 -l2 = 0 - -model_type("nn") - -sparse_update = get_config_arg("sparse_update", bool, False) - -TrainData(ProtoData( - type = "proto_sequence", - files = ('trainer/tests/train_sparse.list'), - )) - -Settings( - algorithm='sgd', - batch_size=100, - learning_rate=0.0001, - learning_rate_decay_a=4e-08, - learning_rate_decay_b=0.0, - learning_rate_schedule='poly', -) - - -wordvec_dim = 32 -layer2_dim = 16 -layer3_dim = 16 -hidden_dim = 32 - -slot_names = ["qb", "qw", "tb", "tw"] - -def ltr_network(network_name, - word_dim=word_dim, - wordvec_dim=wordvec_dim, - layer2_dim=layer2_dim, - layer3_dim=layer3_dim, - hidden_dim=hidden_dim, - slot_names=slot_names, - l1=l1, - l2=l2): - - slotnum = len(slot_names) - for i in xrange(slotnum): - Inputs(slot_names[i] + network_name) - for i in xrange(slotnum): - Layer( - name = slot_names[i] + network_name, - type = "data", - size = word_dim, - device = -1, - ) - Layer( - name = slot_names[i] + "_embedding_" + network_name, - type = "mixed", - size = wordvec_dim, - bias = False, - device = -1, - inputs = TableProjection(slot_names[i] + network_name, - parameter_name = "embedding.w0", - decay_rate_l1=l1, - sparse_remote_update = True, - sparse_update = sparse_update, - ), - ) - Layer( - name = slot_names[i] + "_rnn1_" + network_name, - type = "recurrent", - active_type = "tanh", - bias = Bias(initial_std = 0, - parameter_name = "rnn1.bias"), - inputs = Input(slot_names[i] + "_embedding_" + network_name, - parameter_name = "rnn1.w0") - ) - Layer( - name = slot_names[i] + "_rnnlast_" + network_name, - type = "seqlastins", - inputs = [ - slot_names[i] + "_rnn1_" + network_name, - ], - ) - - Layer( - name = "layer2_" + network_name, - type = "fc", - active_type = "tanh", - size = layer2_dim, - bias = Bias(parameter_name = "layer2.bias"), - inputs = [Input(slot_name + "_rnnlast_" + network_name, - parameter_name = "_layer2_" + slot_name + ".w", - decay_rate = l2, - initial_smart = True) for slot_name in slot_names] - ) - Layer( - name = "layer3_" + network_name, - type = "fc", - active_type = "tanh", - size = layer3_dim, - bias = Bias(parameter_name = "layer3.bias"), - inputs = [ - Input("layer2_" + network_name, - parameter_name = "_layer3.w", - decay_rate = l2, - initial_smart = True), - ] - ) - Layer( - name = "output_" + network_name, - type = "fc", - size = 1, - bias = False, - inputs = [ - Input("layer3_" + network_name, - parameter_name = "_layerO.w"), - ], - ) - - -ltr_network("left") -ltr_network("right") -Inputs("label") -Layer( - name = "label", - type = "data", - size = 1, - ) -Outputs("cost", "qb_rnnlast_left") -Layer( - name = "cost", - type = "rank-cost", - inputs = ["output_left", "output_right", "label"], - ) diff --git a/paddle/trainer/tests/sample_trainer_config_opt_a.conf b/paddle/trainer/tests/sample_trainer_config_opt_a.conf deleted file mode 100644 index b1744db8d604c88ec47e7104f79b38bb9d0e4442..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_opt_a.conf +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -################################### Data Configuration ################################### -TrainData(ProtoData(files = "trainer/tests/mnist.list")) -################################### Algorithm Configuration ################################### -settings(batch_size = 1000, - learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) -################################### Network Configuration ################################### -data = data_layer(name ="input", size=784) - -fc1 = fc_layer(input=data, size=800, - bias_attr=True, - act=SigmoidActivation()) - -fc2 = fc_layer(input=fc1, size=800, - bias_attr=True, - act=SigmoidActivation()) - -output = fc_layer(input=[fc1, fc2], size=10, - bias_attr=True, - act=SoftmaxActivation()) - -lbl = data_layer(name ="label", size=1) - -cost = classification_cost(input=output, label=lbl) -outputs(cost) diff --git a/paddle/trainer/tests/sample_trainer_config_opt_b.conf b/paddle/trainer/tests/sample_trainer_config_opt_b.conf deleted file mode 100644 index b1744db8d604c88ec47e7104f79b38bb9d0e4442..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_opt_b.conf +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -################################### Data Configuration ################################### -TrainData(ProtoData(files = "trainer/tests/mnist.list")) -################################### Algorithm Configuration ################################### -settings(batch_size = 1000, - learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) -################################### Network Configuration ################################### -data = data_layer(name ="input", size=784) - -fc1 = fc_layer(input=data, size=800, - bias_attr=True, - act=SigmoidActivation()) - -fc2 = fc_layer(input=fc1, size=800, - bias_attr=True, - act=SigmoidActivation()) - -output = fc_layer(input=[fc1, fc2], size=10, - bias_attr=True, - act=SoftmaxActivation()) - -lbl = data_layer(name ="label", size=1) - -cost = classification_cost(input=output, label=lbl) -outputs(cost) diff --git a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf b/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf deleted file mode 100644 index d19222360c2f424ddb306b155dfef07921098a6b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf +++ /dev/null @@ -1,154 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -# Note: when making change to this file, please make sure -# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest -# for comparing these two nets can pass (test_CompareTwoNets) - -default_initial_std(0.1) -default_device(0) - -word_dim = 1451594 -l1 = 0 -l2 = 0 - -model_type("nn") - -sparse_update = get_config_arg("sparse_update", bool, False) - -TrainData(ProtoData( - type = "proto_sequence", - files = ('trainer/tests/train.list'), - )) - -Settings( - algorithm='sgd', - batch_size=100, - learning_rate=0.0001, - learning_rate_decay_a=4e-08, - learning_rate_decay_b=0.0, - learning_rate_schedule='poly', -) - - -wordvec_dim = 128 -layer2_dim = 96 -layer3_dim = 96 -hidden_dim = 128 - -slot_names = ["qb", "qw", "tb", "tw"] - -def ltr_network(network_name, - word_dim=word_dim, - wordvec_dim=wordvec_dim, - layer2_dim=layer2_dim, - layer3_dim=layer3_dim, - hidden_dim=hidden_dim, - slot_names=slot_names, - l1=l1, - l2=l2): - - slotnum = len(slot_names) - for i in xrange(slotnum): - Inputs(slot_names[i] + network_name) - for i in xrange(slotnum): - Layer( - name = slot_names[i] + network_name, - type = "data", - size = word_dim, - device = -1, - ) - Layer( - name = slot_names[i] + "_embedding_" + network_name, - type = "mixed", - size = wordvec_dim, - bias = False, - device = -1, - inputs = TableProjection(slot_names[i] + network_name, - parameter_name = "embedding.w0", - decay_rate_l1=l1, - sparse_remote_update = True, - sparse_update = sparse_update, - ), - ) - Layer( - name = slot_names[i] + "_rnn1_" + network_name, - type = "recurrent", - active_type = "tanh", - bias = Bias(initial_std = 0, - parameter_name = "rnn1.bias"), - inputs = Input(slot_names[i] + "_embedding_" + network_name, - parameter_name = "rnn1.w0") - ) - Layer( - name = slot_names[i] + "_rnnlast_" + network_name, - type = "seqlastins", - inputs = [ - slot_names[i] + "_rnn1_" + network_name, - ], - ) - - Layer( - name = "layer2_" + network_name, - type = "fc", - active_type = "tanh", - size = layer2_dim, - bias = Bias(parameter_name = "layer2.bias"), - inputs = [Input(slot_name + "_rnnlast_" + network_name, - parameter_name = "_layer2_" + slot_name + ".w", - decay_rate = l2, - initial_smart = True) for slot_name in slot_names] - ) - Layer( - name = "layer3_" + network_name, - type = "fc", - active_type = "tanh", - size = layer3_dim, - bias = Bias(parameter_name = "layer3.bias"), - inputs = [ - Input("layer2_" + network_name, - parameter_name = "_layer3.w", - decay_rate = l2, - initial_smart = True), - ] - ) - Layer( - name = "output_" + network_name, - type = "fc", - size = 1, - bias = False, - inputs = [ - Input("layer3_" + network_name, - parameter_name = "_layerO.w"), - ], - ) - - -ltr_network("left") -ltr_network("right") -Inputs("label") -Layer( - name = "label", - type = "data", - size = 1, - ) -Outputs("cost", "qb_rnnlast_left") -Layer( - name = "cost", - type = "rank-cost", - inputs = ["output_left", "output_right", "label"], - ) diff --git a/paddle/trainer/tests/sample_trainer_config_rnn.conf b/paddle/trainer/tests/sample_trainer_config_rnn.conf deleted file mode 100644 index b720d4d5a6ca59e207832a8c5410c2cb6074c439..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_rnn.conf +++ /dev/null @@ -1,180 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -# Note: when making change to this file, please make sure -# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest -# for comparing these two nets can pass (test_CompareTwoNets) - -default_initial_std(0.1) -default_device(0) - -word_dim = 1451594 -l1 = 0 -l2 = 0 - -model_type("recurrent_nn") - -sparse_update = get_config_arg("sparse_update", bool, False) - -TrainData(ProtoData( - type = "proto_sequence", - files = ('trainer/tests/train.list'), - )) - -Settings( - algorithm='sgd', - batch_size=100, - learning_rate=0.0001, - learning_rate_decay_a=4e-08, - learning_rate_decay_b=0.0, - learning_rate_schedule='poly', -) - - -wordvec_dim = 128 -layer2_dim = 96 -layer3_dim = 96 -hidden_dim = 128 - -slot_names = ["qb", "qw", "tb", "tw"] - -def SimpleRecurrentLayer(name, - size, - active_type, - bias, - input_layer_name, - parameter_name, - seq_reversed = False): - RecurrentLayerGroupBegin(name + "_layer_group", - in_links=[input_layer_name], - out_links=[name], - seq_reversed=seq_reversed) - memory_name = Memory(name=name, size=size) - Layer( - name = name, - type = "mixed", - size = size, - active_type = active_type, - bias = bias, - inputs = [IdentityProjection(input_layer_name), - FullMatrixProjection(memory_name, - parameter_name = parameter_name, - ), - ] - ) - RecurrentLayerGroupEnd(name + "_layer_group") - - -def ltr_network(network_name, - word_dim=word_dim, - wordvec_dim=wordvec_dim, - layer2_dim=layer2_dim, - layer3_dim=layer3_dim, - hidden_dim=hidden_dim, - slot_names=slot_names, - l1=l1, - l2=l2): - - slotnum = len(slot_names) - for i in xrange(slotnum): - Inputs(slot_names[i] + network_name) - for i in xrange(slotnum): - Layer( - name = slot_names[i] + network_name, - type = "data", - size = word_dim, - device = -1, - ) - Layer( - name = slot_names[i] + "_embedding_" + network_name, - type = "mixed", - size = wordvec_dim, - bias = False, - device = -1, - inputs = TableProjection(slot_names[i] + network_name, - parameter_name = "embedding.w0", - decay_rate_l1=l1, - sparse_remote_update = True, - sparse_update = sparse_update, - ), - ) - SimpleRecurrentLayer( - name = slot_names[i] + "_rnn1_" + network_name, - size = hidden_dim, - active_type = "tanh", - bias = Bias(initial_std = 0, - parameter_name = "rnn1.bias"), - input_layer_name = slot_names[i] + "_embedding_" + network_name, - parameter_name = "rnn1.w0", - ) - Layer( - name = slot_names[i] + "_rnnlast_" + network_name, - type = "seqlastins", - inputs = [ - slot_names[i] + "_rnn1_" + network_name, - ], - ) - Layer( - name = "layer2_" + network_name, - type = "fc", - active_type = "tanh", - size = layer2_dim, - bias = Bias(parameter_name = "layer2.bias"), - inputs = [Input(slot_name + "_rnnlast_" + network_name, - parameter_name = "_layer2_" + slot_name + ".w", - decay_rate = l2, - initial_smart = True) for slot_name in slot_names] - ) - Layer( - name = "layer3_" + network_name, - type = "fc", - active_type = "tanh", - size = layer3_dim, - bias = Bias(parameter_name = "layer3.bias"), - inputs = [ - Input("layer2_" + network_name, - parameter_name = "_layer3.w", - decay_rate = l2, - initial_smart = True), - ] - ) - Layer( - name = "output_" + network_name, - type = "fc", - size = 1, - bias = False, - inputs = [ - Input("layer3_" + network_name, - parameter_name = "_layerO.w"), - ], - ) - - -ltr_network("left") -ltr_network("right") -Inputs("label") -Layer( - name = "label", - type = "data", - size = 1, - ) -Outputs("cost", "qb_rnnlast_left") -Layer( - name = "cost", - type = "rank-cost", - inputs = ["output_left", "output_right", "label"], - ) diff --git a/paddle/trainer/tests/test.txt b/paddle/trainer/tests/test.txt deleted file mode 100644 index 3ad503b34f2e1a84c632d0894f180b5cf9ac550a..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/test.txt +++ /dev/null @@ -1,1000 +0,0 @@ -Confidence NN B-NP -in IN B-PP -the DT B-NP -pound NN I-NP -is VBZ B-VP -widely RB I-VP -expected VBN I-VP -to TO I-VP -take VB I-VP -another DT B-NP -sharp JJ I-NP -dive NN I-NP -if IN B-SBAR -trade NN B-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -, , O -due JJ B-ADJP -for IN B-PP -release NN B-NP -tomorrow NN B-NP -, , O -fail VB B-VP -to TO I-VP -show VB I-VP -a DT B-NP -substantial JJ I-NP -improvement NN I-NP -from IN B-PP -July NNP B-NP -and CC I-NP -August NNP I-NP -'s POS B-NP -near-record JJ I-NP -deficits NNS I-NP -. . O - -Chancellor NNP O -of IN B-PP -the DT B-NP -Exchequer NNP I-NP -Nigel NNP B-NP -Lawson NNP I-NP -'s POS B-NP -restated VBN I-NP -commitment NN I-NP -to TO B-PP -a DT B-NP -firm NN I-NP -monetary JJ I-NP -policy NN I-NP -has VBZ B-VP -helped VBN I-VP -to TO I-VP -prevent VB I-VP -a DT B-NP -freefall NN I-NP -in IN B-PP -sterling NN B-NP -over IN B-PP -the DT B-NP -past JJ I-NP -week NN I-NP -. . O - -But CC O -analysts NNS B-NP -reckon VBP B-VP -underlying VBG B-NP -support NN I-NP -for IN B-PP -sterling NN B-NP -has VBZ B-VP -been VBN I-VP -eroded VBN I-VP -by IN B-PP -the DT B-NP -chancellor NN I-NP -'s POS B-NP -failure NN I-NP -to TO B-VP -announce VB I-VP -any DT B-NP -new JJ I-NP -policy NN I-NP -measures NNS I-NP -in IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -last JJ B-NP -Thursday NNP I-NP -. . O - -This DT B-NP -has VBZ B-VP -increased VBN I-VP -the DT B-NP -risk NN I-NP -of IN B-PP -the DT B-NP -government NN I-NP -being VBG B-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -base NN B-NP -rates NNS I-NP -to TO B-PP -16 CD B-NP -% NN I-NP -from IN B-PP -their PRP$ B-NP -current JJ I-NP -15 CD I-NP -% NN I-NP -level NN I-NP -to TO B-VP -defend VB I-VP -the DT B-NP -pound NN I-NP -, , O -economists NNS B-NP -and CC O -foreign JJ B-NP -exchange NN I-NP -market NN I-NP -analysts NNS I-NP -say VBP B-VP -. . O - -`` `` O -The DT B-NP -risks NNS I-NP -for IN B-PP -sterling NN B-NP -of IN B-PP -a DT B-NP -bad JJ I-NP -trade NN I-NP -figure NN I-NP -are VBP B-VP -very RB B-ADVP -heavily RB I-ADVP -on IN B-PP -the DT B-NP -down JJ I-NP -side NN I-NP -, , O -'' '' O -said VBD B-VP -Chris NNP B-NP -Dillow NNP I-NP -, , O -senior JJ B-NP -U.K. NNP I-NP -economist NN I-NP -at IN B-PP -Nomura NNP B-NP -Research NNP I-NP -Institute NNP I-NP -. . O - -`` `` O -If IN B-SBAR -there EX B-NP -is VBZ B-VP -another DT B-NP -bad JJ I-NP -trade NN I-NP -number NN I-NP -, , O -there EX B-NP -could MD B-VP -be VB I-VP -an DT B-NP -awful JJ I-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -, , O -'' '' O -noted VBD B-VP -Simon NNP B-NP -Briscoe NNP I-NP -, , O -U.K. NNP B-NP -economist NN I-NP -for IN B-PP -Midland NNP B-NP -Montagu NNP I-NP -, , O -a DT B-NP -unit NN I-NP -of IN B-PP -Midland NNP B-NP -Bank NNP I-NP -PLC NNP I-NP -. . O - -Forecasts NNS B-NP -for IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -range VBP B-VP -widely RB B-ADVP -, , O -but CC O -few JJ B-NP -economists NNS I-NP -expect VBP B-VP -the DT B-NP -data NNS I-NP -to TO B-VP -show VB I-VP -a DT B-NP -very RB I-NP -marked VBN I-NP -improvement NN I-NP -from IN B-PP -the DT O -# # O -2 CD O -billion CD O --LRB- ( O -$ $ B-ADJP -3.2 CD O -billion CD O --RRB- ) O -deficit NN B-NP -in IN B-PP -the DT B-NP -current JJ I-NP -account NN I-NP -reported VBD B-VP -for IN B-PP -August NNP B-NP -. . O - -The DT B-NP -August NNP I-NP -deficit NN I-NP -and CC O -the DT B-NP -# # I-NP -2.2 CD I-NP -billion CD I-NP -gap NN I-NP -registered VBN B-VP -in IN B-PP -July NNP B-NP -are VBP B-VP -topped VBN I-VP -only RB B-ADVP -by IN B-PP -the DT B-NP -# # I-NP -2.3 CD I-NP -billion CD I-NP -deficit NN I-NP -of IN B-PP -October NNP B-NP -1988 CD I-NP -. . O - -Sanjay NNP B-NP -Joshi NNP I-NP -, , O -European JJ B-NP -economist NN I-NP -at IN B-PP -Baring NNP B-NP -Brothers NNPS I-NP -& CC I-NP -Co. NNP I-NP -, , O -said VBD B-VP -there EX B-NP -is VBZ B-VP -no DT B-NP -sign NN I-NP -that IN B-SBAR -Britain NNP B-NP -'s POS B-NP -manufacturing NN I-NP -industry NN I-NP -is VBZ B-VP -transforming VBG I-VP -itself PRP B-NP -to TO B-VP -boost VB I-VP -exports NNS B-NP -. . O - -At IN B-PP -the DT B-NP -same JJ I-NP -time NN I-NP -, , O -he PRP B-NP -remains VBZ B-VP -fairly RB B-ADJP -pessimistic JJ I-ADJP -about IN B-PP -the DT B-NP -outlook NN I-NP -for IN B-PP -imports NNS B-NP -, , O -given VBN B-PP -continued VBD B-NP -high JJ I-NP -consumer NN I-NP -and CC I-NP -capital NN I-NP -goods NNS I-NP -inflows NNS I-NP -. . O - -He PRP B-NP -reckons VBZ B-VP -the DT B-NP -current JJ I-NP -account NN I-NP -deficit NN I-NP -will MD B-VP -narrow VB I-VP -to TO B-PP -only RB B-NP -# # I-NP -1.8 CD I-NP -billion CD I-NP -in IN B-PP -September NNP B-NP -. . O - -However RB B-ADVP -, , O -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -he PRP B-NP -believes VBZ B-VP -that IN B-SBAR -a DT B-NP -reduction NN I-NP -in IN B-PP -raw JJ B-NP -material NN I-NP -stockbuilding VBG I-NP -by IN B-PP -industry NN B-NP -could MD B-VP -lead VB I-VP -to TO B-PP -a DT B-NP -sharp JJ I-NP -drop NN I-NP -in IN B-PP -imports NNS B-NP -. . O - -Combined VBN B-PP -with IN B-PP -at IN B-ADVP -least JJS I-ADVP -some DT B-NP -rebound NN I-NP -in IN B-PP -exports NNS B-NP -after IN B-PP -August NNP B-NP -'s POS B-NP -unexpected JJ I-NP -decline NN I-NP -, , O -the DT B-NP -deficit NN I-NP -could MD B-VP -narrow VB I-VP -to TO B-PP -as RB B-NP -little JJ I-NP -as IN I-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -. . O - -Mr. NNP B-NP -Briscoe NNP I-NP -, , O -who WP B-NP -also RB B-ADVP -forecasts VBZ B-VP -a DT B-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -current JJ I-NP -account NN I-NP -gap NN I-NP -, , O -warns VBZ B-VP -that IN B-SBAR -even RB B-SBAR -if IN I-SBAR -the DT B-NP -trade NN I-NP -figures NNS I-NP -are VBP B-VP -bullish JJ B-ADJP -for IN B-PP -sterling NN B-NP -, , O -the DT B-NP -currency NN I-NP -wo MD B-VP -n't RB I-VP -advance VB I-VP -much JJ B-NP -because IN B-SBAR -investors NNS B-NP -will MD B-VP -want VB I-VP -to TO I-VP -see VB I-VP -further JJ B-NP -evidence NN I-NP -of IN B-PP -the DT B-NP -turnaround NN I-NP -before IN B-PP -adjusting VBG B-VP -positions NNS B-NP -. . O - -Nevertheless RB B-ADVP -, , O -he PRP B-NP -noted VBD B-VP -, , O -`` `` O -No DT B-NP -one PRP I-NP -will MD B-VP -want VB I-VP -to TO I-VP -go VB I-VP -into IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -without IN B-PP -a DT B-NP -flat JJ I-NP -position NN I-NP -'' '' O -in IN B-PP -the DT B-NP -pound NN I-NP -. . O - -Meanwhile RB B-ADVP -, , O -overall JJ B-NP -evidence NN I-NP -on IN B-PP -the DT B-NP -economy NN I-NP -remains VBZ B-VP -fairly RB B-ADJP -clouded VBN I-ADJP -. . O - -In IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -, , O -Mr. NNP B-NP -Lawson NNP I-NP -warned VBD B-VP -that IN B-SBAR -a DT B-NP -further JJ I-NP -slowdown NN I-NP -can MD B-VP -be VB I-VP -expected VBN I-VP -as IN B-SBAR -the DT B-NP -impact NN I-NP -of IN B-PP -the DT B-NP -last JJ I-NP -rise NN I-NP -in IN B-PP -interest NN B-NP -rates NNS I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -takes VBZ B-VP -effect NN B-NP -. . O - -U.K. JJ B-NP -base NN I-NP -rates NNS I-NP -are VBP B-VP -at IN B-PP -their PRP$ B-NP -highest JJS I-NP -level NN I-NP -in IN B-PP -eight CD B-NP -years NNS I-NP -. . O - -But CC O -consumer NN B-NP -expenditure NN I-NP -data NNS I-NP -released VBD B-VP -Friday NNP B-NP -do VBP B-VP -n't RB I-VP -suggest VB I-VP -that IN B-SBAR -the DT B-NP -U.K. NNP I-NP -economy NN I-NP -is VBZ B-VP -slowing VBG I-VP -that DT B-ADVP -quickly RB I-ADVP -. . O - -The DT B-NP -figures NNS I-NP -show VBP B-VP -that DT O -spending NN B-NP -rose VBD B-VP -0.1 CD B-NP -% NN I-NP -in IN B-PP -the DT B-NP -third JJ I-NP -quarter NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -and CC O -was VBD B-VP -up IN B-ADVP -3.8 CD B-NP -% NN I-NP -from IN B-PP -a DT B-NP -year NN I-NP -ago RB B-ADVP -. . O - -This DT B-NP -compares VBZ B-VP -with IN B-PP -a DT B-NP -1.6 CD I-NP -% NN I-NP -rise NN I-NP -in IN B-PP -the DT B-NP -second NN I-NP -from IN B-PP -the DT B-NP -first JJ I-NP -quarter NN I-NP -and CC O -a DT B-NP -5.4 CD I-NP -% NN I-NP -increase NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -of IN B-PP -1988 CD B-NP -. . O - -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -the DT B-NP -data NNS I-NP -show VBP B-VP -the DT B-NP -economy NN I-NP -`` `` O -is VBZ B-VP -still RB B-ADVP -quite RB B-ADJP -strong JJ I-ADJP -, , O -'' '' O -but CC O -suggestions NNS B-NP -that IN B-SBAR -much NN B-NP -of IN B-PP -the DT B-NP -spending NN I-NP -went VBD B-VP -on IN B-PP -services NNS B-NP -rather RB B-PP -than IN I-PP -consumer NN B-NP -goods NNS I-NP -should MD B-VP -reduce VB I-VP -fears NNS B-NP -of IN B-PP -more JJR B-NP -import NN I-NP -rises NNS I-NP -. . O - -Certainly RB B-ADVP -, , O -the DT B-NP -chancellor NN I-NP -has VBZ B-VP -made VBN I-VP -it PRP B-NP -clear JJ B-ADJP -that IN B-SBAR -he PRP B-NP -is VBZ B-VP -prepared VBN I-VP -to TO I-VP -increase VB I-VP -interest NN B-NP -rates NNS I-NP -again RB B-ADVP -if IN B-SBAR -necessary JJ B-ADJP -to TO B-VP -both DT I-VP -ensure VB I-VP -that IN B-SBAR -a DT B-NP -substantial JJ I-NP -slowdown NN I-NP -does VBZ B-VP -take VB I-VP -place NN B-NP -and CC O -that DT O -sterling NN B-NP -does VBZ B-VP -n't RB I-VP -decline VB I-VP -further JJ B-ADVP -. . O - -Thursday NNP B-NP -, , O -he PRP B-NP -reminded VBD B-VP -his PRP$ B-NP -audience NN I-NP -that IN B-SBAR -the DT B-NP -government NN I-NP -`` `` O -can MD B-VP -not RB I-VP -allow VB I-VP -the DT B-NP -necessary JJ I-NP -rigor NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -to TO B-VP -be VB I-VP -undermined VBN I-VP -by IN B-PP -exchange NN B-NP -rate NN I-NP -weakness NN I-NP -. . O -'' '' O - -Analysts NNS B-NP -agree VBP B-VP -there EX B-NP -is VBZ B-VP -little JJ B-NP -holding NN B-VP -sterling NN B-NP -firm NN B-ADJP -at IN B-PP -the DT B-NP -moment NN I-NP -other JJ B-ADJP -than IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -that IN B-SBAR -rates NNS B-NP -will MD B-VP -be VB I-VP -pushed VBN I-VP -higher JJR B-ADJP -if IN B-SBAR -necessary JJ B-ADJP -. . O - -And CC O -, , O -they PRP B-NP -warn VBP B-VP -, , O -any DT B-NP -further JJ I-NP -drop NN I-NP -in IN B-PP -the DT B-NP -government NN I-NP -'s POS B-NP -popularity NN I-NP -could MD B-VP -swiftly RB I-VP -make VB I-VP -this DT B-NP -promise NN I-NP -sound NN B-VP -hollow JJ B-ADJP -. . O - -Sterling NNP B-NP -was VBD B-VP -already RB I-VP -showing VBG I-VP -some DT B-NP -signs NNS I-NP -of IN B-PP -a DT B-NP -lack NN I-NP -of IN B-PP -confidence NN B-NP -in IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -Friday NNP B-NP -. . O - -In IN B-PP -European JJ B-NP -trading NN I-NP -it PRP B-NP -declined VBD B-VP -to TO B-PP -$ $ B-NP -1.5890 CD I-NP -and CC O -2.9495 CD B-NP -marks NNS I-NP -from IN B-PP -$ $ B-NP -1.5940 CD I-NP -and CC O -2.9429 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -Economists NNS B-NP -suggested VBD B-VP -that IN B-SBAR -if IN B-SBAR -the DT B-NP -pound NN I-NP -falls VBZ B-VP -much JJ B-NP -below IN B-PP -2.90 CD B-NP -marks NNS I-NP -, , O -the DT B-NP -government NN I-NP -will MD B-VP -be VB I-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -rates NNS B-NP -to TO B-PP -16 CD B-NP -% NN I-NP -, , O -both DT B-VP -to TO I-VP -halt VB B-VP -any DT B-NP -further JJ I-NP -decline NN I-NP -and CC O -ensure VB B-VP -that IN B-SBAR -the DT B-NP -balance NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -remains VBZ B-VP -unchanged JJ B-ADJP -. . O - -Friday NNP B-NP -'s POS B-NP -Market NNP I-NP -Activity NN I-NP - -The DT B-NP -dollar NN I-NP -posted VBD B-VP -gains NNS B-NP -in IN B-PP -quiet JJ B-NP -trading NN I-NP -as IN B-SBAR -concerns NNS B-NP -about IN B-PP -equities NNS B-NP -abated VBN B-VP -. . O - -Foreign JJ B-NP -exchange NN I-NP -dealers NNS I-NP -said VBD B-VP -that IN B-SBAR -the DT B-NP -currency NN I-NP -market NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -distance VB I-VP -itself PRP B-NP -from IN B-PP -the DT B-NP -volatile JJ I-NP -stock NN I-NP -exchange NN I-NP -, , O -which WDT B-NP -has VBZ B-VP -preoccupied VBN I-VP -the DT B-NP -market NN I-NP -since IN B-PP -Oct. NNP B-NP -13 CD I-NP -, , O -when WRB B-ADVP -the DT B-NP -Dow NNP I-NP -Jones NNP I-NP -Industrial NNP I-NP -Average NNP I-NP -plunged VBD B-VP -more JJR B-NP -than IN I-NP -190 CD I-NP -points NNS I-NP -. . O - -Currency NN B-NP -analysts NNS I-NP -predict VBP B-VP -that IN B-SBAR -in IN B-PP -the DT B-NP -coming VBG I-NP -week NN I-NP -the DT B-NP -foreign JJ I-NP -exchange NN I-NP -market NN I-NP -will MD B-VP -shift VB I-VP -its PRP$ B-NP -focus NN I-NP -back RB B-ADVP -to TO B-PP -economic JJ B-NP -fundamentals NNS I-NP -, , O -keeping VBG B-VP -a DT B-NP -close NN I-NP -eye NN I-NP -out IN B-ADVP -for IN B-PP -any DT B-NP -signs NNS I-NP -of IN B-PP -monetary JJ B-NP -easing NN I-NP -by IN B-PP -U.S. NNP B-NP -Federal NNP I-NP -Reserve NNP I-NP -. . O - -Late RB B-ADVP -in IN B-PP -the DT B-NP -New NNP I-NP -York NNP I-NP -trading NN I-NP -day NN I-NP -, , O -the DT B-NP -dollar NN I-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -1.8578 CD B-NP -marks NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -1.8470 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -. . O - -The DT B-NP -U.S. NNP I-NP -currency NN I-NP -was VBD B-VP -also RB I-VP -changing VBG I-VP -hands NNS B-NP -at IN B-PP -142.43 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -141.70 CD B-NP -yen NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -In IN B-PP -Tokyo NNP B-NP -on IN B-PP -Monday NNP B-NP -, , O -the DT B-NP -U.S. NNP I-NP -currency NN I-NP -opened VBD B-VP -for IN B-PP -trading NN B-NP -at IN B-PP -141.95 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -Friday NNP B-NP -'s POS B-NP -Tokyo NNP I-NP diff --git a/paddle/trainer/tests/testPyDataWrapper.py b/paddle/trainer/tests/testPyDataWrapper.py index 2c29a274339747b78fbd6c27ae4070f0abbd4028..a76eeeacb91cdba305d2f71c6292f79e4b98dd73 100644 --- a/paddle/trainer/tests/testPyDataWrapper.py +++ b/paddle/trainer/tests/testPyDataWrapper.py @@ -20,28 +20,6 @@ import random import json import string - -@provider(slots=[ - SparseNonValueSlot(10), DenseSlot(2), SparseValueSlot(10), StringSlot(1), - IndexSlot(3) -]) -def processNonSequenceData(obj, filename): - with open(filename, "rb") as f: - for line in f: - slots_str = line.split(';') - index = int(slots_str[0]) - non_values = map(int, slots_str[1].split()[1:]) - dense = map(float, slots_str[2].split()[1:]) - strs = slots_str[4].strip().split(' ', 1)[1] - - def __values_mapper__(s): - s = s.split(":") - return int(s[0]), float(s[1]) - - values = map(__values_mapper__, slots_str[3].split()[1:]) - yield [non_values, dense, values, strs, index] - - SPARSE_ID_LIMIT = 1000 SPARSE_ID_COUNT = 100 SEQUENCE_LIMIT = 50 @@ -146,8 +124,6 @@ def processSubSeqAndGenerateData(obj, name): if __name__ == "__main__": - pvd = processNonSequenceData("test.txt") - print pvd.getNextBatch(100) pvd = processSeqAndGenerateData("_") print pvd.getNextBatch(100) pvd = processSubSeqAndGenerateData("_") diff --git a/paddle/trainer/tests/test_CompareTwoOpts.cpp b/paddle/trainer/tests/test_CompareTwoOpts.cpp deleted file mode 100644 index 383505f8131264844069d6f0fa13f4e0ac1f97af..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/test_CompareTwoOpts.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include - -#include "paddle/trainer/Trainer.h" - -using namespace paddle; // NOLINT -using namespace std; // NOLINT - -DECLARE_int32(gpu_id); - -DECLARE_bool(local); -DECLARE_bool(use_gpu); - -DECLARE_string(config); -DECLARE_string(nics); - -DEFINE_string(config_file_a, "", "config of one network to compare"); -DEFINE_string(config_file_b, "", "config of another network to compare"); -DEFINE_bool(need_high_accuracy, - true, - "whether need to run in double accuracy (recommended)"); -DEFINE_double( - max_diff_ratio, - 0.0f, - "max diff ratio allowed for outputs and parameters (value/gradient)"); - -struct ComData { - vector outArgs; - vector parameters; -}; - -void calcGradient(ComData& data, const string configFile) { - FLAGS_config = configFile; - - FLAGS_local = true; - FLAGS_use_gpu = false; - - FLAGS_nics = ""; - - *ThreadLocalRand::getSeed() = 0; - srand(0); - - Trainer trainer; - trainer.init(TrainerConfigHelper::createFromFlagConfig(), false); - - data.parameters = trainer.getGradientMachine()->getParameters(); - trainer.getDataProvider()->setSkipShuffle(); - trainer.train(); -} - -void checkBuffer(real* A, - const char* desA, - real* B, - const char* desB, - size_t len, - size_t width = 1) { - int nNum = 0; - for (size_t i = 0; i < len; ++i) { - real diff = fabs(A[i] - B[i]); - if (diff > 0.0f && - diff / std::max(fabs(A[i]), fabs(B[i])) > FLAGS_max_diff_ratio) { - nNum++; - LOG(INFO) << "Row: " << i / width << ", " << desA << " : " << A[i] - << " " << desB << " : " << B[i]; - } - } - EXPECT_EQ(0, nNum); - LOG(INFO) << "\n\n"; -} - -void compareGradient(ComData& comDataA, ComData& comDataB) { - vector outArgsA = comDataA.outArgs; - vector outArgsB = comDataB.outArgs; - - for (size_t i = 0; i < outArgsA.size(); ++i) { - CpuMatrix matA(outArgsA[i].value->getHeight(), - outArgsA[i].value->getWidth()); - CpuMatrix matB(outArgsB[i].value->getHeight(), - outArgsB[i].value->getWidth()); - - matA.copyFrom(*outArgsA[i].value); - matB.copyFrom(*outArgsB[i].value); - - LOG(INFO) << "\n--------------------------------" - << " Check Network Output_" << i << ":" - << " -------------------------------------\n"; - checkBuffer(matA.getData(), - "network A output", - matB.getData(), - "network B output", - matA.getElementCnt(), - matA.getWidth()); - } - - vector& parametersA = comDataA.parameters; - vector& parametersB = comDataB.parameters; - - LOG(INFO) << "\n\n--------------------------------" - << " Check Gradient Machine Parameters:" - << " -------------------------------------\n"; - for (size_t i = 0; i < parametersA.size(); ++i) { - ParameterPtr parameterA, parameterB; - parameterA = parametersA[i]; - parameterB = parametersB[i]; - - CpuVector paraA(parameterA->getSize()); - CpuVector paraB(parameterB->getSize()); - paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE)); - paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE)); - - LOG(INFO) << "\n\n----------- PARAMETER_VALUE: " << parameterA->getName() - << " ; size : " << paraA.getSize() << " ------------"; - checkBuffer(paraA.getData(), - "Network A", - paraB.getData(), - "Network B", - paraA.getSize()); - - CpuVector gradA(*parameterA->getBuf(PARAMETER_GRADIENT)); - CpuVector gradB(*parameterB->getBuf(PARAMETER_GRADIENT)); - - LOG(INFO) << "\n\n----------- PARAMETER_GRADIENT: " << parameterA->getName() - << " ; size : " << gradA.getSize() << " -----------"; - checkBuffer(gradA.getData(), - "Network A", - gradB.getData(), - "Network B", - gradA.getSize()); - } -} - -TEST(Trainer, create) { - ComData dataA; - calcGradient(dataA, FLAGS_config_file_a); - LOG(INFO) << "\n\ntraining of Network A is finished\n\n"; - - ComData dataB; - calcGradient(dataB, FLAGS_config_file_b); - LOG(INFO) << "\n\ntraining of the Network B is finished\n\n"; - - compareGradient(dataA, dataB); -} - -int main(int argc, char** argv) { - paddle::initMain(argc, argv); - testing::InitGoogleTest(&argc, argv); - initPython(argc, argv); - -#ifndef PADDLE_TYPE_DOUBLE - if (FLAGS_need_high_accuracy) { - LOG(INFO) << "skip test due to it's need high accuracy"; - return 0; - } - if (FLAGS_max_diff_ratio == 0.0f) { - FLAGS_max_diff_ratio = 2e-4; - LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio - << " in low accuracy mode"; - } -#else - if (FLAGS_max_diff_ratio == 0.0f) { - FLAGS_max_diff_ratio = 2e-7; - LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio - << " in high accuracy mode"; - } -#endif - int ret = RUN_ALL_TESTS(); - return ret; -} diff --git a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp b/paddle/trainer/tests/test_PyDataProviderWrapper.cpp index 66ec65e340a435a7260028611828fb28845e0728..92dc8aa9ec5ce281d1950d84260c1b9555e686a7 100644 --- a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp +++ b/paddle/trainer/tests/test_PyDataProviderWrapper.cpp @@ -25,45 +25,9 @@ limitations under the License. */ #include #include "picojson.h" -void checkEqual(const paddle::Argument& expect, const paddle::Argument& actual); void checkValue(std::vector& arguments, picojson::array& arr); const std::string kDir = "./trainer/tests/pydata_provider_wrapper_dir/"; -TEST(PyDataProviderWrapper, NoSequenceData) { - paddle::DataConfig conf; - conf.set_type("py"); - conf.set_load_data_module(std::string("testPyDataWrapper")); - conf.set_load_data_object(std::string("processNonSequenceData")); - conf.set_async_load_data(false); - conf.clear_files(); - conf.set_files(kDir + "test_pydata_provider_wrapper.list"); - paddle::DataProviderPtr provider(paddle::DataProvider::create(conf, false)); - provider->setSkipShuffle(); - provider->reset(); - paddle::DataBatch batchFromPy; - provider->getNextBatch(100, &batchFromPy); - - paddle::DataConfig conf2; - conf2.set_type("proto"); - conf2.set_async_load_data(false); - conf2.clear_files(); - conf2.set_files(kDir + "test_pydata_provider_wrapper.protolist"); - - provider.reset(paddle::DataProvider::create(conf2, false)); - provider->setSkipShuffle(); - provider->reset(); - paddle::DataBatch batchFromProto; - provider->getNextBatch(100, &batchFromProto); - - std::vector& pyArguments = batchFromPy.getStreams(); - std::vector& protoArguments = batchFromProto.getStreams(); - EXPECT_EQ(pyArguments.size(), protoArguments.size()); - - for (size_t i = 0; i < pyArguments.size(); ++i) { - checkEqual(protoArguments[i], pyArguments[i]); - } -} - TEST(PyDataProviderWrapper, SequenceData) { paddle::DataConfig conf; conf.set_type("py"); @@ -148,66 +112,6 @@ int main(int argc, char** argv) { return RUN_ALL_TESTS(); } -void checkEqual(const paddle::Argument& expect, - const paddle::Argument& actual) { - if (expect.value) { - EXPECT_TRUE(actual.value != nullptr); - paddle::Matrix* e = expect.value.get(); - paddle::Matrix* a = actual.value.get(); - EXPECT_EQ(e->getWidth(), a->getWidth()); - EXPECT_EQ(e->getHeight(), a->getHeight()); - if (dynamic_cast(e)) { - paddle::CpuSparseMatrix* se = dynamic_cast(e); - paddle::CpuSparseMatrix* sa = dynamic_cast(a); - EXPECT_EQ(se->getFormat(), sa->getFormat()); - EXPECT_EQ(se->getElementCnt(), sa->getElementCnt()); - size_t rowSize = se->getFormat() == paddle::SPARSE_CSC - ? se->getElementCnt() - : se->getHeight() + 1; - size_t colSize = se->getFormat() == paddle::SPARSE_CSC - ? se->getWidth() + 1 - : se->getElementCnt(); - for (size_t i = 0; i < rowSize; ++i) { - EXPECT_EQ(se->getRows()[i], sa->getRows()[i]); - } - for (size_t i = 0; i < colSize; ++i) { - EXPECT_EQ(se->getCols()[i], sa->getCols()[i]); - } - if (se->getValueType() == paddle::FLOAT_VALUE) { - EXPECT_EQ(paddle::FLOAT_VALUE, sa->getValueType()); - for (size_t i = 0; i < se->getElementCnt(); ++i) { - EXPECT_EQ(se->getValue()[i], sa->getValue()[i]); - } - } - } else if (dynamic_cast(e)) { - EXPECT_EQ(e->getElementCnt(), a->getElementCnt()); - for (size_t i = 0; i < e->getElementCnt(); ++i) { - EXPECT_EQ(e->getData()[i], a->getData()[i]); - } - } - } - - if (expect.ids) { - EXPECT_TRUE(actual.ids != nullptr); - paddle::VectorT* e = expect.ids.get(); - paddle::VectorT* a = actual.ids.get(); - EXPECT_EQ(e->getSize(), a->getSize()); - for (size_t i = 0; i < e->getSize(); ++i) { - EXPECT_EQ(e->getData()[i], a->getData()[i]); - } - } - - if (expect.strs) { - EXPECT_TRUE(actual.strs != nullptr); - std::vector* e = expect.strs.get(); - std::vector* a = actual.strs.get(); - EXPECT_EQ(e->size(), a->size()); - for (size_t i = 0; i < e->size(); ++i) { - EXPECT_EQ((*e)[i], (*a)[i]); - } - } -} - void checkValue(std::vector& arguments, picojson::array& arr) { // CHECK SLOT 0, Sparse Value. diff --git a/paddle/trainer/tests/test_Trainer.cpp b/paddle/trainer/tests/test_Trainer.cpp index 425b3d10a38086463784ba2a18db1293efe96e92..394038cf730f13cb957fbbc5ae0e5719b8fe9db6 100644 --- a/paddle/trainer/tests/test_Trainer.cpp +++ b/paddle/trainer/tests/test_Trainer.cpp @@ -24,7 +24,6 @@ using namespace std; // NOLINT static const string& configFile1 = "trainer/tests/sample_trainer_config.conf"; static const string& configFile2 = "trainer/tests/sample_trainer_config_hsigmoid.conf"; -static const string& configFile3 = "trainer/tests/chunking.conf"; static const string& configFile4 = "trainer/tests/sample_trainer_config_parallel.conf"; @@ -95,13 +94,6 @@ TEST(checkGradient, multi) { TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } -TEST(checkGradient, chunk) { - checkGradientTest(configFile3, false, false); -#ifdef PADDLE_WITH_CUDA - checkGradientTest(configFile3, true, true); -#endif -} - TEST(checkGradient, non_parallel) { checkGradientTest(configFile4, false, false); } diff --git a/paddle/trainer/tests/test_config.conf b/paddle/trainer/tests/test_config.conf index d1bb9b877fe26702948586dbe90b9ff0ee27c1d6..2f86aaa75316fa2a5a28edfef31c01e15a44b3d0 100644 --- a/paddle/trainer/tests/test_config.conf +++ b/paddle/trainer/tests/test_config.conf @@ -15,12 +15,7 @@ from paddle.trainer_config_helpers import * -TrainData(ProtoData( - files = "dummy_list", - constant_slots = [1.0], - async_load_data = True)) - -TestData(SimpleData( +TrainData(SimpleData( files = "trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, diff --git a/paddle/trainer/tests/test_files.txt b/paddle/trainer/tests/test_files.txt deleted file mode 100644 index 49002677a848c499610d5e869ce61efb2105e3c8..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/test_files.txt +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/test_proto.bin diff --git a/paddle/trainer/tests/train.list b/paddle/trainer/tests/train.list deleted file mode 100644 index f41e8e8893de6068deb43b08ec6a3bcdd4039326..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/data_bin_part diff --git a/paddle/trainer/tests/train.txt b/paddle/trainer/tests/train.txt deleted file mode 100644 index 2313aee987ba71ba7ea779d3cf7705478e7fbde2..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train.txt +++ /dev/null @@ -1,5000 +0,0 @@ -Confidence NN B-NP -in IN B-PP -the DT B-NP -pound NN I-NP -is VBZ B-VP -widely RB I-VP -expected VBN I-VP -to TO I-VP -take VB I-VP -another DT B-NP -sharp JJ I-NP -dive NN I-NP -if IN B-SBAR -trade NN B-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -, , O -due JJ B-ADJP -for IN B-PP -release NN B-NP -tomorrow NN B-NP -, , O -fail VB B-VP -to TO I-VP -show VB I-VP -a DT B-NP -substantial JJ I-NP -improvement NN I-NP -from IN B-PP -July NNP B-NP -and CC I-NP -August NNP I-NP -'s POS B-NP -near-record JJ I-NP -deficits NNS I-NP -. . O - -Chancellor NNP O -of IN B-PP -the DT B-NP -Exchequer NNP I-NP -Nigel NNP B-NP -Lawson NNP I-NP -'s POS B-NP -restated VBN I-NP -commitment NN I-NP -to TO B-PP -a DT B-NP -firm NN I-NP -monetary JJ I-NP -policy NN I-NP -has VBZ B-VP -helped VBN I-VP -to TO I-VP -prevent VB I-VP -a DT B-NP -freefall NN I-NP -in IN B-PP -sterling NN B-NP -over IN B-PP -the DT B-NP -past JJ I-NP -week NN I-NP -. . O - -But CC O -analysts NNS B-NP -reckon VBP B-VP -underlying VBG B-NP -support NN I-NP -for IN B-PP -sterling NN B-NP -has VBZ B-VP -been VBN I-VP -eroded VBN I-VP -by IN B-PP -the DT B-NP -chancellor NN I-NP -'s POS B-NP -failure NN I-NP -to TO B-VP -announce VB I-VP -any DT B-NP -new JJ I-NP -policy NN I-NP -measures NNS I-NP -in IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -last JJ B-NP -Thursday NNP I-NP -. . O - -This DT B-NP -has VBZ B-VP -increased VBN I-VP -the DT B-NP -risk NN I-NP -of IN B-PP -the DT B-NP -government NN I-NP -being VBG B-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -base NN B-NP -rates NNS I-NP -to TO B-PP -16 CD B-NP -% NN I-NP -from IN B-PP -their PRP$ B-NP -current JJ I-NP -15 CD I-NP -% NN I-NP -level NN I-NP -to TO B-VP -defend VB I-VP -the DT B-NP -pound NN I-NP -, , O -economists NNS B-NP -and CC O -foreign JJ B-NP -exchange NN I-NP -market NN I-NP -analysts NNS I-NP -say VBP B-VP -. . O - -`` `` O -The DT B-NP -risks NNS I-NP -for IN B-PP -sterling NN B-NP -of IN B-PP -a DT B-NP -bad JJ I-NP -trade NN I-NP -figure NN I-NP -are VBP B-VP -very RB B-ADVP -heavily RB I-ADVP -on IN B-PP -the DT B-NP -down JJ I-NP -side NN I-NP -, , O -'' '' O -said VBD B-VP -Chris NNP B-NP -Dillow NNP I-NP -, , O -senior JJ B-NP -U.K. NNP I-NP -economist NN I-NP -at IN B-PP -Nomura NNP B-NP -Research NNP I-NP -Institute NNP I-NP -. . O - -`` `` O -If IN B-SBAR -there EX B-NP -is VBZ B-VP -another DT B-NP -bad JJ I-NP -trade NN I-NP -number NN I-NP -, , O -there EX B-NP -could MD B-VP -be VB I-VP -an DT B-NP -awful JJ I-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -, , O -'' '' O -noted VBD B-VP -Simon NNP B-NP -Briscoe NNP I-NP -, , O -U.K. NNP B-NP -economist NN I-NP -for IN B-PP -Midland NNP B-NP -Montagu NNP I-NP -, , O -a DT B-NP -unit NN I-NP -of IN B-PP -Midland NNP B-NP -Bank NNP I-NP -PLC NNP I-NP -. . O - -Forecasts NNS B-NP -for IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -range VBP B-VP -widely RB B-ADVP -, , O -but CC O -few JJ B-NP -economists NNS I-NP -expect VBP B-VP -the DT B-NP -data NNS I-NP -to TO B-VP -show VB I-VP -a DT B-NP -very RB I-NP -marked VBN I-NP -improvement NN I-NP -from IN B-PP -the DT O -# # O -2 CD O -billion CD O --LRB- ( O -$ $ B-ADJP -3.2 CD O -billion CD O --RRB- ) O -deficit NN B-NP -in IN B-PP -the DT B-NP -current JJ I-NP -account NN I-NP -reported VBD B-VP -for IN B-PP -August NNP B-NP -. . O - -The DT B-NP -August NNP I-NP -deficit NN I-NP -and CC O -the DT B-NP -# # I-NP -2.2 CD I-NP -billion CD I-NP -gap NN I-NP -registered VBN B-VP -in IN B-PP -July NNP B-NP -are VBP B-VP -topped VBN I-VP -only RB B-ADVP -by IN B-PP -the DT B-NP -# # I-NP -2.3 CD I-NP -billion CD I-NP -deficit NN I-NP -of IN B-PP -October NNP B-NP -1988 CD I-NP -. . O - -Sanjay NNP B-NP -Joshi NNP I-NP -, , O -European JJ B-NP -economist NN I-NP -at IN B-PP -Baring NNP B-NP -Brothers NNPS I-NP -& CC I-NP -Co. NNP I-NP -, , O -said VBD B-VP -there EX B-NP -is VBZ B-VP -no DT B-NP -sign NN I-NP -that IN B-SBAR -Britain NNP B-NP -'s POS B-NP -manufacturing NN I-NP -industry NN I-NP -is VBZ B-VP -transforming VBG I-VP -itself PRP B-NP -to TO B-VP -boost VB I-VP -exports NNS B-NP -. . O - -At IN B-PP -the DT B-NP -same JJ I-NP -time NN I-NP -, , O -he PRP B-NP -remains VBZ B-VP -fairly RB B-ADJP -pessimistic JJ I-ADJP -about IN B-PP -the DT B-NP -outlook NN I-NP -for IN B-PP -imports NNS B-NP -, , O -given VBN B-PP -continued VBD B-NP -high JJ I-NP -consumer NN I-NP -and CC I-NP -capital NN I-NP -goods NNS I-NP -inflows NNS I-NP -. . O - -He PRP B-NP -reckons VBZ B-VP -the DT B-NP -current JJ I-NP -account NN I-NP -deficit NN I-NP -will MD B-VP -narrow VB I-VP -to TO B-PP -only RB B-NP -# # I-NP -1.8 CD I-NP -billion CD I-NP -in IN B-PP -September NNP B-NP -. . O - -However RB B-ADVP -, , O -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -he PRP B-NP -believes VBZ B-VP -that IN B-SBAR -a DT B-NP -reduction NN I-NP -in IN B-PP -raw JJ B-NP -material NN I-NP -stockbuilding VBG I-NP -by IN B-PP -industry NN B-NP -could MD B-VP -lead VB I-VP -to TO B-PP -a DT B-NP -sharp JJ I-NP -drop NN I-NP -in IN B-PP -imports NNS B-NP -. . O - -Combined VBN B-PP -with IN B-PP -at IN B-ADVP -least JJS I-ADVP -some DT B-NP -rebound NN I-NP -in IN B-PP -exports NNS B-NP -after IN B-PP -August NNP B-NP -'s POS B-NP -unexpected JJ I-NP -decline NN I-NP -, , O -the DT B-NP -deficit NN I-NP -could MD B-VP -narrow VB I-VP -to TO B-PP -as RB B-NP -little JJ I-NP -as IN I-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -. . O - -Mr. NNP B-NP -Briscoe NNP I-NP -, , O -who WP B-NP -also RB B-ADVP -forecasts VBZ B-VP -a DT B-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -current JJ I-NP -account NN I-NP -gap NN I-NP -, , O -warns VBZ B-VP -that IN B-SBAR -even RB B-SBAR -if IN I-SBAR -the DT B-NP -trade NN I-NP -figures NNS I-NP -are VBP B-VP -bullish JJ B-ADJP -for IN B-PP -sterling NN B-NP -, , O -the DT B-NP -currency NN I-NP -wo MD B-VP -n't RB I-VP -advance VB I-VP -much JJ B-NP -because IN B-SBAR -investors NNS B-NP -will MD B-VP -want VB I-VP -to TO I-VP -see VB I-VP -further JJ B-NP -evidence NN I-NP -of IN B-PP -the DT B-NP -turnaround NN I-NP -before IN B-PP -adjusting VBG B-VP -positions NNS B-NP -. . O - -Nevertheless RB B-ADVP -, , O -he PRP B-NP -noted VBD B-VP -, , O -`` `` O -No DT B-NP -one PRP I-NP -will MD B-VP -want VB I-VP -to TO I-VP -go VB I-VP -into IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -without IN B-PP -a DT B-NP -flat JJ I-NP -position NN I-NP -'' '' O -in IN B-PP -the DT B-NP -pound NN I-NP -. . O - -Meanwhile RB B-ADVP -, , O -overall JJ B-NP -evidence NN I-NP -on IN B-PP -the DT B-NP -economy NN I-NP -remains VBZ B-VP -fairly RB B-ADJP -clouded VBN I-ADJP -. . O - -In IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -, , O -Mr. NNP B-NP -Lawson NNP I-NP -warned VBD B-VP -that IN B-SBAR -a DT B-NP -further JJ I-NP -slowdown NN I-NP -can MD B-VP -be VB I-VP -expected VBN I-VP -as IN B-SBAR -the DT B-NP -impact NN I-NP -of IN B-PP -the DT B-NP -last JJ I-NP -rise NN I-NP -in IN B-PP -interest NN B-NP -rates NNS I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -takes VBZ B-VP -effect NN B-NP -. . O - -U.K. JJ B-NP -base NN I-NP -rates NNS I-NP -are VBP B-VP -at IN B-PP -their PRP$ B-NP -highest JJS I-NP -level NN I-NP -in IN B-PP -eight CD B-NP -years NNS I-NP -. . O - -But CC O -consumer NN B-NP -expenditure NN I-NP -data NNS I-NP -released VBD B-VP -Friday NNP B-NP -do VBP B-VP -n't RB I-VP -suggest VB I-VP -that IN B-SBAR -the DT B-NP -U.K. NNP I-NP -economy NN I-NP -is VBZ B-VP -slowing VBG I-VP -that DT B-ADVP -quickly RB I-ADVP -. . O - -The DT B-NP -figures NNS I-NP -show VBP B-VP -that DT O -spending NN B-NP -rose VBD B-VP -0.1 CD B-NP -% NN I-NP -in IN B-PP -the DT B-NP -third JJ I-NP -quarter NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -and CC O -was VBD B-VP -up IN B-ADVP -3.8 CD B-NP -% NN I-NP -from IN B-PP -a DT B-NP -year NN I-NP -ago RB B-ADVP -. . O - -This DT B-NP -compares VBZ B-VP -with IN B-PP -a DT B-NP -1.6 CD I-NP -% NN I-NP -rise NN I-NP -in IN B-PP -the DT B-NP -second NN I-NP -from IN B-PP -the DT B-NP -first JJ I-NP -quarter NN I-NP -and CC O -a DT B-NP -5.4 CD I-NP -% NN I-NP -increase NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -of IN B-PP -1988 CD B-NP -. . O - -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -the DT B-NP -data NNS I-NP -show VBP B-VP -the DT B-NP -economy NN I-NP -`` `` O -is VBZ B-VP -still RB B-ADVP -quite RB B-ADJP -strong JJ I-ADJP -, , O -'' '' O -but CC O -suggestions NNS B-NP -that IN B-SBAR -much NN B-NP -of IN B-PP -the DT B-NP -spending NN I-NP -went VBD B-VP -on IN B-PP -services NNS B-NP -rather RB B-PP -than IN I-PP -consumer NN B-NP -goods NNS I-NP -should MD B-VP -reduce VB I-VP -fears NNS B-NP -of IN B-PP -more JJR B-NP -import NN I-NP -rises NNS I-NP -. . O - -Certainly RB B-ADVP -, , O -the DT B-NP -chancellor NN I-NP -has VBZ B-VP -made VBN I-VP -it PRP B-NP -clear JJ B-ADJP -that IN B-SBAR -he PRP B-NP -is VBZ B-VP -prepared VBN I-VP -to TO I-VP -increase VB I-VP -interest NN B-NP -rates NNS I-NP -again RB B-ADVP -if IN B-SBAR -necessary JJ B-ADJP -to TO B-VP -both DT I-VP -ensure VB I-VP -that IN B-SBAR -a DT B-NP -substantial JJ I-NP -slowdown NN I-NP -does VBZ B-VP -take VB I-VP -place NN B-NP -and CC O -that DT O -sterling NN B-NP -does VBZ B-VP -n't RB I-VP -decline VB I-VP -further JJ B-ADVP -. . O - -Thursday NNP B-NP -, , O -he PRP B-NP -reminded VBD B-VP -his PRP$ B-NP -audience NN I-NP -that IN B-SBAR -the DT B-NP -government NN I-NP -`` `` O -can MD B-VP -not RB I-VP -allow VB I-VP -the DT B-NP -necessary JJ I-NP -rigor NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -to TO B-VP -be VB I-VP -undermined VBN I-VP -by IN B-PP -exchange NN B-NP -rate NN I-NP -weakness NN I-NP -. . O -'' '' O - -Analysts NNS B-NP -agree VBP B-VP -there EX B-NP -is VBZ B-VP -little JJ B-NP -holding NN B-VP -sterling NN B-NP -firm NN B-ADJP -at IN B-PP -the DT B-NP -moment NN I-NP -other JJ B-ADJP -than IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -that IN B-SBAR -rates NNS B-NP -will MD B-VP -be VB I-VP -pushed VBN I-VP -higher JJR B-ADJP -if IN B-SBAR -necessary JJ B-ADJP -. . O - -And CC O -, , O -they PRP B-NP -warn VBP B-VP -, , O -any DT B-NP -further JJ I-NP -drop NN I-NP -in IN B-PP -the DT B-NP -government NN I-NP -'s POS B-NP -popularity NN I-NP -could MD B-VP -swiftly RB I-VP -make VB I-VP -this DT B-NP -promise NN I-NP -sound NN B-VP -hollow JJ B-ADJP -. . O - -Sterling NNP B-NP -was VBD B-VP -already RB I-VP -showing VBG I-VP -some DT B-NP -signs NNS I-NP -of IN B-PP -a DT B-NP -lack NN I-NP -of IN B-PP -confidence NN B-NP -in IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -Friday NNP B-NP -. . O - -In IN B-PP -European JJ B-NP -trading NN I-NP -it PRP B-NP -declined VBD B-VP -to TO B-PP -$ $ B-NP -1.5890 CD I-NP -and CC O -2.9495 CD B-NP -marks NNS I-NP -from IN B-PP -$ $ B-NP -1.5940 CD I-NP -and CC O -2.9429 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -Economists NNS B-NP -suggested VBD B-VP -that IN B-SBAR -if IN B-SBAR -the DT B-NP -pound NN I-NP -falls VBZ B-VP -much JJ B-NP -below IN B-PP -2.90 CD B-NP -marks NNS I-NP -, , O -the DT B-NP -government NN I-NP -will MD B-VP -be VB I-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -rates NNS B-NP -to TO B-PP -16 CD B-NP -% NN I-NP -, , O -both DT B-VP -to TO I-VP -halt VB B-VP -any DT B-NP -further JJ I-NP -decline NN I-NP -and CC O -ensure VB B-VP -that IN B-SBAR -the DT B-NP -balance NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -remains VBZ B-VP -unchanged JJ B-ADJP -. . O - -Friday NNP B-NP -'s POS B-NP -Market NNP I-NP -Activity NN I-NP - -The DT B-NP -dollar NN I-NP -posted VBD B-VP -gains NNS B-NP -in IN B-PP -quiet JJ B-NP -trading NN I-NP -as IN B-SBAR -concerns NNS B-NP -about IN B-PP -equities NNS B-NP -abated VBN B-VP -. . O - -Foreign JJ B-NP -exchange NN I-NP -dealers NNS I-NP -said VBD B-VP -that IN B-SBAR -the DT B-NP -currency NN I-NP -market NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -distance VB I-VP -itself PRP B-NP -from IN B-PP -the DT B-NP -volatile JJ I-NP -stock NN I-NP -exchange NN I-NP -, , O -which WDT B-NP -has VBZ B-VP -preoccupied VBN I-VP -the DT B-NP -market NN I-NP -since IN B-PP -Oct. NNP B-NP -13 CD I-NP -, , O -when WRB B-ADVP -the DT B-NP -Dow NNP I-NP -Jones NNP I-NP -Industrial NNP I-NP -Average NNP I-NP -plunged VBD B-VP -more JJR B-NP -than IN I-NP -190 CD I-NP -points NNS I-NP -. . O - -Currency NN B-NP -analysts NNS I-NP -predict VBP B-VP -that IN B-SBAR -in IN B-PP -the DT B-NP -coming VBG I-NP -week NN I-NP -the DT B-NP -foreign JJ I-NP -exchange NN I-NP -market NN I-NP -will MD B-VP -shift VB I-VP -its PRP$ B-NP -focus NN I-NP -back RB B-ADVP -to TO B-PP -economic JJ B-NP -fundamentals NNS I-NP -, , O -keeping VBG B-VP -a DT B-NP -close NN I-NP -eye NN I-NP -out IN B-ADVP -for IN B-PP -any DT B-NP -signs NNS I-NP -of IN B-PP -monetary JJ B-NP -easing NN I-NP -by IN B-PP -U.S. NNP B-NP -Federal NNP I-NP -Reserve NNP I-NP -. . O - -Late RB B-ADVP -in IN B-PP -the DT B-NP -New NNP I-NP -York NNP I-NP -trading NN I-NP -day NN I-NP -, , O -the DT B-NP -dollar NN I-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -1.8578 CD B-NP -marks NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -1.8470 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -. . O - -The DT B-NP -U.S. NNP I-NP -currency NN I-NP -was VBD B-VP -also RB I-VP -changing VBG I-VP -hands NNS B-NP -at IN B-PP -142.43 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -141.70 CD B-NP -yen NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -In IN B-PP -Tokyo NNP B-NP -on IN B-PP -Monday NNP B-NP -, , O -the DT B-NP -U.S. NNP I-NP -currency NN I-NP -opened VBD B-VP -for IN B-PP -trading NN B-NP -at IN B-PP -141.95 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -Friday NNP B-NP -'s POS B-NP -Tokyo NNP I-NP -close NN I-NP -of IN B-PP -141.35 CD B-NP -yen NN I-NP -. . O - -On IN B-PP -the DT B-NP -Commodity NNP I-NP -Exchange NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -, , O -gold NN B-NP -for IN B-PP -current JJ B-NP -delivery NN I-NP -settled VBD B-VP -at IN B-PP -$ $ B-NP -367.30 CD I-NP -an DT B-NP -ounce NN I-NP -, , O -up IN B-ADVP -20 CD B-NP -cents NNS I-NP -. . O - -Estimated VBN B-NP -volume NN I-NP -was VBD B-VP -a DT B-NP -light NN I-NP -2.4 CD I-NP -million CD I-NP -ounces NNS I-NP -. . O - -In IN B-PP -early JJ B-NP -trading NN I-NP -in IN B-PP -Hong NNP B-NP -Kong NNP I-NP -Monday NNP B-NP -, , O -gold NN B-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -$ $ B-NP -366.50 CD I-NP -an DT B-NP -ounce NN I-NP -. . O - -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -Limited NNP I-NP -Partnership NNP I-NP -said VBD B-VP -it PRP B-NP -proposed VBD B-VP -to TO I-VP -acquire VB I-VP -A.P. NNP B-NP -Green NNP I-NP -Industries NNP I-NP -Inc. NNP I-NP -for IN B-PP -$ $ B-NP -40 CD I-NP -a DT B-NP -share NN I-NP -. . O - -In IN B-PP -an DT B-NP -Oct. NNP I-NP -19 CD I-NP -letter NN I-NP -to TO B-PP -A.P. NNP B-NP -Green NNP I-NP -'s POS B-NP -board NN I-NP -, , O -East NNP B-NP -Rock NNP I-NP -said VBD B-VP -the DT B-NP -offer NN I-NP -is VBZ B-VP -subject NN B-ADJP -to TO B-PP -the DT B-NP -signing NN I-NP -of IN B-PP -a DT B-NP -merger NN I-NP -agreement NN I-NP -by IN B-PP -no DT B-ADVP -later RB I-ADVP -than IN B-PP -Oct. NNP B-NP -31 CD I-NP -. . O - -The DT B-NP -letter NN I-NP -, , O -attached VBN B-VP -to TO B-PP -a DT B-NP -filing NN I-NP -with IN B-PP -the DT B-NP -Securities NNP I-NP -and CC I-NP -Exchange NNP I-NP -Commission NNP I-NP -, , O -said VBD B-VP -the DT B-NP -approval NN I-NP -is VBZ B-VP -also RB B-ADVP -contingent JJ B-ADJP -upon IN B-PP -obtaining VBG B-VP -satisfactory JJ B-NP -financing NN I-NP -. . O - -An DT B-NP -A.P. NNP I-NP -Green NNP I-NP -official NN I-NP -declined VBD B-VP -to TO I-VP -comment VB I-VP -on IN B-PP -the DT B-NP -filing NN I-NP -. . O - -The DT B-NP -$ $ I-NP -40-a-share JJ I-NP -proposal NN I-NP -values VBZ B-VP -the DT B-NP -company NN I-NP -at IN B-PP -about RB B-NP -$ $ I-NP -106.6 CD I-NP -million CD I-NP -. . O - -A.P. NNP B-NP -Green NNP I-NP -currently RB B-ADVP -has VBZ B-VP -2,664,098 CD B-NP -shares NNS I-NP -outstanding JJ B-ADJP -. . O - -Its PRP$ B-NP -stock NN I-NP -closed VBD B-VP -at IN B-PP -$ $ B-NP -38 CD I-NP -, , O -up IN B-ADVP -$ $ B-NP -1.875 CD I-NP -, , O -in IN B-PP -national JJ B-NP -over-the-counter JJ I-NP -trading NN I-NP -. . O - -The DT B-NP -company NN I-NP -is VBZ B-VP -a DT B-NP -Mexico NNP I-NP -, , I-NP -Mo. NNP I-NP -, , I-NP -maker NN I-NP -of IN B-PP -refractory JJ B-NP -products NNS I-NP -. . O - -East NNP B-NP -Rock NNP I-NP -also RB B-ADVP -said VBD B-VP -in IN B-PP -the DT B-NP -filing NN I-NP -that IN B-SBAR -it PRP B-NP -boosted VBD B-VP -its PRP$ B-NP -stake NN I-NP -in IN B-PP -A.P. NNP B-NP -Green NNP I-NP -to TO B-PP -8.7 CD B-NP -% NN I-NP -. . O - -It PRP B-NP -now RB B-ADVP -holds VBZ B-VP -233,000 CD B-NP -A.P. NNP I-NP -Green NNP I-NP -common JJ I-NP -shares NNS I-NP -, , O -including VBG B-PP -30,000 CD B-NP -shares NNS I-NP -bought VBD B-VP -last JJ B-NP -Thursday NNP I-NP -for IN B-PP -$ $ B-NP -35.50 CD I-NP -to TO I-NP -$ $ I-NP -36.50 CD I-NP -a DT B-NP -share NN I-NP -. . O - -New NNP B-NP -York-based JJ I-NP -John NNP I-NP -Kuhns NNP I-NP -and CC I-NP -Robert NNP I-NP -MacDonald NNP I-NP -control NN B-VP -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -Inc. NNP I-NP -, , O -the DT B-NP -sole JJ I-NP -general JJ I-NP -partner NN I-NP -of IN B-PP -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -L.P NNP I-NP -. . O - -The DT B-NP -sole JJ I-NP -limited JJ I-NP -partner NN I-NP -of IN B-PP -the DT B-NP -partnership NN I-NP -is VBZ B-VP -Westwood NNP B-NP -Brick NNP I-NP -Lime NNP I-NP -Inc. NNP I-NP -, , O -an DT B-NP -indirect JJ I-NP -subsidiary NN I-NP -of IN B-PP -Westwood NNP B-NP -Group NNP I-NP -Inc NNP I-NP -. . O - -Both DT B-NP -Westwood NNP B-NP -Brick NNP I-NP -and CC O -Westwood NNP B-NP -Group NNP I-NP -are VBP B-VP -based VBN I-VP -in IN B-PP -Boston NNP B-NP -. . O - -Freight NN B-NP -rates NNS I-NP -, , O -declining VBG B-VP -for IN B-PP -most RBS B-NP -of IN B-PP -the DT B-NP -decade NN I-NP -because IN B-PP -of IN I-PP -competition NN B-NP -spurred VBN B-VP -by IN B-PP -deregulation NN B-NP -, , O -are VBP B-VP -bottoming VBG I-VP -out IN B-PRT -, , O -turning VBG B-VP -upward RB B-ADVP -and CC O -threatening VBG B-VP -to TO I-VP -fuel VB I-VP -inflation NN B-NP -. . O - -Trucking NNP B-NP -, , I-NP -shipping VBG I-NP -and CC I-NP -air-freight NN I-NP -companies NNS I-NP -have VBP B-VP -announced VBN I-VP -rate NN B-NP -increases NNS I-NP -, , O -scheduled VBN B-VP -for IN B-PP -this DT B-NP -fall NN I-NP -or CC O -early JJ B-NP -next JJ I-NP -year NN I-NP -, , O -reflecting VBG B-VP -higher JJR B-NP -costs NNS I-NP -and CC O -tightened VBD B-NP -demand NN I-NP -for IN B-PP -freight NN B-NP -transport NN I-NP -. . O - -Major JJ B-NP -shippers NNS I-NP -say VBP B-VP -they PRP B-NP -expect VBP B-VP -freight NN B-NP -rates NNS I-NP -to TO B-VP -rise VB I-VP -at IN B-ADVP -least JJS I-ADVP -as RB B-ADVP -fast RB I-ADVP -as IN B-PP -inflation NN B-NP -and CC B-ADVP -maybe RB I-ADVP -faster RBR B-ADVP -in IN B-PP -the DT B-NP -next JJ I-NP -few JJ I-NP -years NNS I-NP -. . O - -That DT B-NP -'s VBZ B-VP -a DT B-NP -big JJ I-NP -change NN I-NP -from IN B-PP -recent JJ B-NP -years NNS I-NP -when WRB B-ADVP -freight NN B-NP -haulage NN I-NP -was VBD B-VP -a DT B-NP -bright JJ I-NP -spot NN I-NP -for IN B-PP -U.S. NNP B-NP -productivity NN I-NP -, , O -helping VBG B-VP -to TO I-VP -restrain VB I-VP -inflation NN B-NP -and CC O -make VB B-VP -U.S. NNP B-NP -industry NN I-NP -more RBR B-ADJP -competitive JJ I-ADJP -abroad RB B-ADVP -. . O - -`` `` O -Demand NN B-NP -has VBZ B-VP -caught VBN I-VP -up IN B-PRT -with IN B-PP -the DT B-NP -supply NN I-NP -of IN B-PP -certain JJ B-NP -types NNS I-NP -of IN B-PP -freight NN B-NP -transportation NN I-NP -, , O -and CC O -rates NNS B-NP -are VBP B-VP -starting VBG I-VP -to TO I-VP -move VB I-VP -up IN B-ADVP -'' '' O -at IN B-PP -a DT B-NP -rate NN I-NP -`` `` O -close RB B-ADJP -to TO B-PP -or CC O -slightly RB B-ADJP -more JJR I-ADJP -than IN B-PP -the DT B-NP -inflation NN I-NP -rate NN I-NP -, , O -'' '' O -said VBD B-VP -Clifford NNP B-NP -Sayre NNP I-NP -, , O -director NN B-NP -of IN B-PP -logistics NNS B-NP -at IN B-PP -Du NNP B-NP -Pont NNP I-NP -Co NNP I-NP -. . O - -Shippers NNS B-NP -surveyed VBN B-VP -recently RB B-ADVP -by IN B-PP -Ohio NNP B-NP -State NNP I-NP -University NNP I-NP -said VBD B-VP -they PRP B-NP -expect VBP B-VP -their PRP$ B-NP -freight-transport JJ I-NP -, , I-NP -storage NN I-NP -and CC I-NP -distribution NN I-NP -costs NNS I-NP -to TO B-VP -rise VB I-VP -about IN B-NP -4 CD I-NP -% NN I-NP -this DT B-NP -year NN I-NP -. . O - -Only RB B-NP -10 CD I-NP -% NN I-NP -of IN B-PP -the DT B-NP -250 CD I-NP -shippers NNS I-NP -polled VBN B-VP -expected VBN B-VP -their PRP$ B-NP -freight-transport JJ I-NP -costs NNS I-NP -to TO B-VP -decrease VB I-VP -, , O -compared VBN B-PP -with IN B-PP -30 CD B-NP -% NN I-NP -who WP B-NP -had VBD B-VP -looked VBN I-VP -to TO B-PP -freight VB B-NP -transport NN I-NP -to TO B-VP -reduce VB I-VP -costs NNS B-NP -in IN B-PP -past JJ B-NP -years NNS I-NP -. . O - -`` `` O -This DT B-NP -is VBZ B-VP -the DT B-NP -first JJ I-NP -year NN I-NP -since IN B-PP -transportation NN B-NP -deregulation NN I-NP -in IN B-PP -1980 CD B-NP -that IN B-ADVP -we PRP B-NP -have VBP B-VP -had VBN I-VP -such JJ B-NP -a DT I-NP -dramatic JJ I-NP -and CC I-NP -broad-based JJ I-NP -upturn NN I-NP -in IN B-PP -perceived VBN B-NP -transportation NN I-NP -rates NNS I-NP -, , O -'' '' O -said VBD B-VP -Bernard NNP B-NP -LaLonde NNP I-NP -, , O -a DT B-NP -transportation NN I-NP -logistics NNS I-NP -professor NN I-NP -at IN B-PP -Ohio NNP B-NP -State NNP I-NP -in IN B-PP -Columbus NNP B-NP -. . O - -The DT B-NP -deregulation NN I-NP -of IN B-PP -railroads NNS B-NP -and CC I-NP -trucking NN I-NP -companies NNS I-NP -that WDT B-NP -began VBD B-VP -in IN B-PP -1980 CD B-NP -enabled VBD B-VP -shippers NNS B-NP -to TO B-VP -bargain VB I-VP -for IN B-PP -transportation NN B-NP -. . O - -Carriers NNP B-NP -could MD B-VP -use VB I-VP -their PRP$ B-NP -equipment NN I-NP -more RBR B-ADVP -efficiently RB I-ADVP -, , O -leading VBG B-VP -to TO B-PP -overcapacity NN B-NP -they PRP B-NP -were VBD B-VP -eager JJ B-ADJP -to TO B-VP -fill VB I-VP -. . O - -Shippers NNS B-NP -cut VBP B-VP -about RB B-NP -$ $ I-NP -35 CD I-NP -billion CD I-NP -from IN B-PP -their PRP$ B-NP -annual JJ I-NP -, , I-NP -inter-city JJ I-NP -truck NN I-NP -and CC I-NP -rail NN I-NP -costs NNS I-NP -, , O -to TO B-PP -about RB B-NP -$ $ I-NP -150 CD I-NP -billion CD I-NP -, , O -or CC O -about IN B-NP -6.4 CD I-NP -% NN I-NP -of IN B-PP -gross JJ B-NP -national JJ I-NP -product NN I-NP -, , O -down RB B-ADVP -from IN B-PP -8 CD B-NP -% NN I-NP -of IN B-PP -GNP NNP B-NP -in IN B-PP -1981 CD B-NP -. . O - -But CC O -with IN B-PP -much NN B-NP -of IN B-PP -the DT B-NP -inefficiency NN I-NP -squeezed VBN B-VP -out IN B-PP -of IN B-PP -the DT B-NP -freight-transport JJ I-NP -system NN I-NP -, , O -rising VBG B-NP -costs NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -reflected VBN I-VP -directly RB B-ADVP -in IN B-PP -higher JJR B-NP -freight NN I-NP -rates NNS I-NP -. . O - -`` `` O -Shippers NNS B-NP -are VBP B-VP -saying VBG I-VP -` `` O -the DT B-NP -party NN I-NP -'s POS B-VP -over IN B-ADJP -, , O -' '' O -'' '' O -said VBD B-VP -Mr. NNP B-NP -LaLonde NNP I-NP -. . O - -`` `` O -Shippers NNS B-NP -wo MD B-VP -n't RB I-VP -be VB I-VP -able JJ B-ADJP -to TO B-VP -look VB I-VP -for IN B-PP -transportation-cost JJ B-NP -savings NNS I-NP -as IN B-SBAR -they PRP B-NP -have VBP B-VP -for IN B-PP -the DT B-NP -last JJ I-NP -eight CD I-NP -or CC I-NP -nine CD I-NP -years NNS I-NP -. . O - -Transport NN B-NP -rates NNS I-NP -wo MD B-VP -n't RB I-VP -be VB I-VP -an DT B-NP -opportunity NN I-NP -for IN B-PP -offsetting VBG B-VP -cost NN B-NP -increases NNS I-NP -in IN B-PP -other JJ B-NP -segments NNS I-NP -of IN B-PP -the DT B-NP -economy NN I-NP -. . O -'' '' O - -Robert NNP B-NP -Delaney NNP I-NP -, , O -a DT B-NP -consultant NN I-NP -at IN B-PP -Arthur NNP B-NP -D. NNP I-NP -Little NNP I-NP -Inc. NNP I-NP -, , O -Cambridge NNP B-NP -, , O -Mass. NNP B-NP -, , O -said VBD B-VP -`` `` O -We PRP B-NP -'ve VBP B-VP -gotten VBN I-VP -all PDT B-NP -the DT I-NP -benefits NNS I-NP -of IN B-PP -deregulation NN B-NP -in IN B-PP -freight-cost JJ B-NP -reductions NNS I-NP -. . O - -Now RB B-ADVP -we PRP B-NP -are VBP B-VP -starting VBG I-VP -to TO I-VP -see VB I-VP -real JJ B-NP -freight-rate JJ I-NP -increases NNS I-NP -as IN B-SBAR -carriers NNS B-NP -replace VBP B-VP -equipment NN B-NP -, , O -pay VB B-VP -higher JJR B-NP -fuel NN I-NP -costs NNS I-NP -and CC O -pay VB B-VP -more JJR B-NP -for IN B-PP -labor NN B-NP -. . O - -You PRP B-NP -'ll MD B-VP -see VB I-VP -carriers NNS B-NP -try VB B-VP -to TO I-VP -recoup VB I-VP -some DT B-NP -of IN B-PP -the DT B-NP -price NN I-NP -cutting VBG I-NP -that WDT B-NP -occurred VBD B-VP -previously RB B-ADVP -. . O -'' '' O - -Not RB B-NP -everyone NN I-NP -believes VBZ B-VP -that IN B-SBAR -the DT B-NP -good JJ I-NP -times NNS I-NP -are VBP B-VP -over IN B-ADJP -for IN B-PP -shippers NNS B-NP -. . O - -`` `` O -There EX B-NP -'s VBZ B-VP -still RB B-ADVP -a DT B-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -on IN B-PP -rates NNS B-NP -in IN B-PP -both DT B-NP -rail NN I-NP -and CC I-NP -truck NN I-NP -, , O -'' '' O -said VBD B-VP -Gerard NNP B-NP -McCullough NNP I-NP -, , O -lecturer NN B-NP -in IN B-PP -transportation NN B-NP -at IN B-PP -Massachusetts NNP B-NP -Institute NNP I-NP -of IN B-PP -Technology NNP B-NP -. . O - -Less-than-truckload JJ B-NP -companies NNS I-NP -, , O -which WDT B-NP -carry VBP B-VP -the DT B-NP -freight NN I-NP -of IN B-PP -several JJ B-NP -shippers NNS I-NP -in IN B-PP -each DT B-NP -truck NN I-NP -trailer NN I-NP -, , O -discounted VBD B-VP -away RB B-ADVP -a DT B-NP -4.7 CD I-NP -% NN I-NP -rate NN I-NP -increase NN I-NP -implemented VBD B-VP -last JJ B-NP -April NNP I-NP -. . O - -The DT B-NP -carriers NNS I-NP -were VBD B-VP -competing VBG I-VP -fiercely RB B-ADVP -for IN B-PP -market NN B-NP -share NN I-NP -. . O - -Railroad-rate JJ B-NP -increases NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -restrained VBN I-VP -by IN B-PP -weakening VBG B-NP -rail-traffic JJ I-NP -levels NNS I-NP -and CC O -keen JJ B-NP -competition NN I-NP -for IN B-PP -freight NN B-NP -from IN B-PP -trucks NNS B-NP -. . O - -An DT B-NP -official NN I-NP -at IN B-PP -Consolidated NNP B-NP -Freightways NNP I-NP -Inc. NNP I-NP -, , O -a DT B-NP -Menlo NNP I-NP -Park NNP I-NP -, , I-NP -Calif. NNP I-NP -, , I-NP -less-than-truckload JJ I-NP -carrier NN I-NP -, , O -said VBD B-VP -rate NN B-NP -discounting NN I-NP -in IN B-PP -that DT B-NP -industry NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -`` `` O -stabilize VB B-VP -. . O -'' '' O - -Consolidated NNP B-NP -Freightways NNP I-NP -plans VBZ B-VP -to TO I-VP -raise VB I-VP -its PRP$ B-NP -rates NNS I-NP -5.3 CD B-NP -% NN I-NP -late JJ B-NP -this DT I-NP -year NN I-NP -or CC O -early JJ B-NP -next JJ I-NP -year NN I-NP -, , O -and CC O -at IN B-NP -least JJS I-NP -two CD I-NP -competitors NNS I-NP -have VBP B-VP -announced VBN I-VP -similar JJ B-NP -increases NNS I-NP -. . O - -Truckers NNS B-NP -are VBP B-VP -`` `` O -trying VBG B-VP -to TO I-VP -send VB I-VP -signals NNS B-NP -that IN B-SBAR -they PRP B-NP -need VBP B-VP -to TO I-VP -stop VB I-VP -the DT B-NP -bloodletting NN I-NP -, , O -forget VB B-VP -about IN B-PP -market NN B-NP -share NN I-NP -and CC O -go VB B-VP -for IN B-PP -higher JJR B-NP -rates NNS I-NP -, , O -'' '' O -said VBD B-VP -Michael NNP B-NP -Lloyd NNP I-NP -, , O -an DT B-NP -analyst NN I-NP -at IN B-PP -Salomon NNP B-NP -Bros NNP I-NP -. . O - -And CC O -`` `` O -shippers NNS B-NP -are VBP B-VP -getting VBG I-VP -the DT B-NP -feeling NN I-NP -that IN B-SBAR -they PRP B-NP -have VBP B-VP -played VBN I-VP -one CD B-NP -trucker NN I-NP -off IN B-ADVP -against IN B-PP -another DT B-NP -as RB B-NP -much JJ I-NP -as IN B-SBAR -they PRP B-NP -can MD B-VP -, , O -'' '' O -he PRP B-NP -said VBD B-VP -. . O - -Air-freight NN B-NP -carriers NNS I-NP -raised VBD B-VP -their PRP$ B-NP -rates NNS I-NP -for IN B-PP -U.S. NNP B-NP -products NNS I-NP -going VBG B-VP -across IN B-PP -the DT B-NP -Pacific NNP I-NP -to TO B-PP -Asia NNP B-NP -by IN B-PP -about IN B-NP -20 CD I-NP -% NN I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -. . O - -And CC O -Japan NNP B-NP -Air NNP I-NP -Lines NNPS I-NP -said VBD B-VP -it PRP B-NP -plans VBZ B-VP -to TO I-VP -boost VB I-VP -its PRP$ B-NP -rates NNS I-NP -a DT B-NP -further JJ I-NP -25 CD I-NP -% NN I-NP -over IN B-PP -the DT B-NP -next JJ I-NP -two CD I-NP -years NNS I-NP -. . O - -Such JJ B-NP -rate NN I-NP -increases NNS I-NP -`` `` O -will MD B-VP -increase VB I-VP -the DT B-NP -total JJ I-NP -cost NN I-NP -of IN B-PP -U.S. NNP B-NP -products NNS I-NP -and CC O -slow JJ B-VP -down RP B-PRT -the DT B-NP -rate NN I-NP -of IN B-PP -increase NN B-NP -of IN B-PP -U.S. NNP B-NP -exports NNS I-NP -, , O -'' '' O -said VBD B-VP -Richard NNP B-NP -Connors NNP I-NP -, , O -a DT B-NP -senior JJ I-NP -vice NN I-NP -president NN I-NP -of IN B-PP -Yusen NNP B-NP -Air NNP I-NP -& CC I-NP -Sea NNP I-NP -Service NNP I-NP -U.S.A. NNP I-NP -Inc. NNP I-NP -, , O -the DT B-NP -U.S. NNP I-NP -air-freight-forwarding JJ I-NP -subsidiary NN I-NP -of IN B-PP -Nippon NNP B-NP -Yusen NNP I-NP -Kaisha NNP I-NP -of IN B-PP -Japan NNP B-NP -. . O - -Ship NN B-NP -companies NNS I-NP -carrying VBG B-VP -bulk NN B-NP -commodities NNS I-NP -, , O -such JJ B-PP -as IN I-PP -oil NN B-NP -, , O -grain NN B-NP -, , O -coal NN B-NP -and CC O -iron NN B-NP -ore NN I-NP -, , O -have VBP B-VP -been VBN I-VP -able JJ B-ADJP -to TO B-VP -increase VB I-VP -their PRP$ B-NP -rates NNS I-NP -in IN B-PP -the DT B-NP -last JJ I-NP -couple NN I-NP -of IN B-PP -years NNS B-NP -. . O - -Some DT B-NP -bulk NN I-NP -shipping VBG I-NP -rates NNS I-NP -have VBP B-VP -increased VBN I-VP -`` `` O -3 CD B-NP -% NN I-NP -to TO I-NP -4 CD I-NP -% NN I-NP -in IN B-PP -the DT B-NP -past JJ I-NP -few JJ I-NP -months NNS I-NP -, , O -'' '' O -said VBD B-VP -Salomon NNP B-NP -'s POS B-NP -Mr. NNP I-NP -Lloyd NNP I-NP -. . O - -And CC O -ship NN B-NP -lines NNS I-NP -carrying VBG B-VP -containers NNS B-NP -are VBP B-VP -also RB I-VP -trying VBG I-VP -to TO I-VP -raise VB I-VP -their PRP$ B-NP -rates NNS I-NP -. . O - -Carriers NNP B-NP -boosted VBD B-VP -rates NNS B-NP -more JJR B-NP -than IN I-NP -10 CD I-NP -% NN I-NP -in IN B-PP -the DT B-NP -North NNP I-NP -Atlantic NNP I-NP -between IN B-PP -the DT B-NP -U.S. NNP I-NP -and CC O -Europe NNP B-NP -last JJ B-NP -September NNP I-NP -, , O -hoping VBG B-VP -to TO I-VP -partly RB I-VP -restore VB I-VP -rates NNS B-NP -to TO B-PP -earlier JJR B-NP -levels NNS I-NP -. . O - -Ship NN B-NP -lines NNS I-NP -operating VBG B-VP -in IN B-PP -the DT B-NP -Pacific NNP I-NP -plan NN B-VP -to TO I-VP -raise VB I-VP -rates NNS B-NP -on IN B-PP -containers NNS B-NP -carrying VBG B-VP -U.S. NNP B-NP -exports NNS I-NP -to TO B-PP -Asia NNP B-NP -about IN B-NP -10 CD I-NP -% NN I-NP -, , O -effective JJ B-ADJP -next JJ B-NP -April NNP I-NP -. . O - -MGM NNP B-NP -Grand NNP I-NP -Inc. NNP I-NP -said VBD B-VP -it PRP B-NP -filed VBD B-VP -a DT B-NP -registration NN I-NP -statement NN I-NP -with IN B-PP -the DT B-NP -Securities NNP I-NP -and CC I-NP -Exchange NNP I-NP -Commission NNP I-NP -for IN B-PP -a DT B-NP -public JJ I-NP -offering NN I-NP -of IN B-PP -six CD B-NP -million CD I-NP -common JJ I-NP -shares NNS I-NP -. . O - -The DT B-NP -Beverly NNP I-NP -Hills NNP I-NP -, , I-NP -Calif.-based JJ I-NP -company NN I-NP -said VBD B-VP -it PRP B-NP -would MD B-VP -have VB I-VP -26.9 CD B-NP -million CD I-NP -common JJ I-NP -shares NNS I-NP -outstanding JJ B-ADJP -after IN B-PP -the DT B-NP -offering NN I-NP -. . O - -The DT B-NP -hotel NN I-NP -and CC I-NP -Gaming NNP I-NP -company NN I-NP -said VBD B-VP -Merrill NNP B-NP -Lynch NNP I-NP -Capital NNP I-NP -Markets NNPS I-NP -will MD B-VP -lead VB I-VP -the DT B-NP -underwriters NNS I-NP -. . O - -Proceeds NNS B-NP -from IN B-PP -the DT B-NP -sale NN I-NP -will MD B-VP -be VB I-VP -used VBN I-VP -for IN B-PP -remodeling VBG B-NP -and CC I-NP -refurbishing VBG I-NP -projects NNS I-NP -, , B-PP -as RB I-PP -well RB I-PP -as IN I-PP -for IN B-PP -the DT B-NP -planned VBN I-NP -MGM NNP I-NP -Grand NNP I-NP -hotel\/casino NN I-NP -and CC I-NP -theme NN I-NP -park NN I-NP -. . O - -Bob NNP B-NP -Stone NNP I-NP -stewed JJ B-VP -over IN B-PP -a DT B-NP -letter NN I-NP -from IN B-PP -his PRP$ B-NP -manager NN I-NP -putting VBG B-VP -him PRP B-NP -on IN B-PP -probation NN B-NP -for IN B-PP -insubordination NN B-NP -. . O - -Mr. NNP B-NP -Stone NNP I-NP -thought VBD B-VP -the DT B-NP -discipline NN I-NP -was VBD B-VP -unfair JJ B-ADJP -; : O -he PRP B-NP -believed VBD B-VP -that IN B-SBAR -his PRP$ B-NP -manager NN I-NP -wanted VBD B-VP -to TO I-VP -get VB I-VP -rid JJ B-ADJP -of IN B-PP -him PRP B-NP -for IN B-PP -personal JJ B-NP -reasons NNS I-NP -. . O - -Unable JJ B-ADJP -to TO B-VP -persuade VB I-VP -the DT B-NP -manager NN I-NP -to TO B-VP -change VB I-VP -his PRP$ B-NP -decision NN I-NP -, , O -he PRP B-NP -went VBD B-VP -to TO B-PP -a DT B-NP -`` `` I-NP -company NN I-NP -court NN I-NP -'' '' O -for IN B-PP -a DT B-NP -hearing NN I-NP -. . O - -At IN B-PP -the DT B-NP -scheduled VBN I-NP -time NN I-NP -, , O -Mr. NNP B-NP -Stone NNP I-NP -entered VBD B-VP -a DT B-NP -conference NN I-NP -room NN I-NP -in IN B-PP -a DT B-NP -building NN I-NP -near IN B-PP -where WRB B-ADVP -he PRP B-NP -worked VBD B-VP -. . O - -After IN B-SBAR -the DT B-NP -three CD I-NP -members NNS I-NP -of IN B-PP -the DT B-NP -court NN I-NP -introduced VBD B-VP -themselves PRP B-NP -, , O -the DT B-NP -chairman NN I-NP -of IN B-PP -the DT B-NP -panel NN I-NP -said VBD B-VP -: : O -`` `` O -Go VB B-VP -ahead RB B-ADVP -and CC O -tell VB B-VP -us PRP B-NP -what WP B-NP -happened VBD B-VP -. . O - -We PRP B-NP -may MD B-VP -ask VB I-VP -questions NNS B-NP -as IN B-SBAR -you PRP B-NP -go VBP B-VP -along IN B-PRT -, , O -or CC O -we PRP B-NP -may MD B-VP -wait VB I-VP -until IN B-PP -the DT B-NP -end NN I-NP -. . O -'' '' O - -No DT B-NP -lawyers NNS I-NP -or CC I-NP -tape NN I-NP -recorders NNS I-NP -were VBD B-VP -present JJ B-ADJP -. . O - -The DT B-NP -only RB I-NP -extra JJ I-NP -people NNS I-NP -were VBD B-VP -a DT B-NP -couple NN I-NP -of IN B-PP -personnel NNS B-NP -specialists NNS I-NP -, , O -one CD B-NP -of IN B-PP -whom WP B-NP -knew VBD B-VP -Mr. NNP B-NP -Stone NNP I-NP -'s POS B-NP -case NN I-NP -intimately RB B-ADVP -and CC O -would MD B-VP -help VB I-VP -fill VB I-VP -in IN B-PRT -any DT B-NP -facts NNS I-NP -needed VBN B-VP -to TO B-VP -give VB I-VP -the DT B-NP -court NN I-NP -the DT B-NP -full JJ I-NP -picture NN I-NP -. . O - -Over IN B-PP -a DT B-NP -cup NN I-NP -of IN B-PP -coffee NN B-NP -, , O -Mr. NNP B-NP -Stone NNP I-NP -told VBD B-VP -his PRP$ B-NP -story NN I-NP -. . O - -He PRP B-NP -talked VBD B-VP -about IN B-NP -20 CD I-NP -minutes NNS I-NP -. . O - -When WRB B-ADVP -he PRP B-NP -was VBD B-VP -through IN B-ADJP -, , O -the DT B-NP -court NN I-NP -members NNS I-NP -asked VBD B-VP -many JJ B-NP -questions NNS I-NP -, , O -then RB B-ADVP -the DT B-NP -chairman NN I-NP -said VBD B-VP -they PRP B-NP -would MD B-VP -like VB I-VP -to TO I-VP -hear VB I-VP -his PRP$ B-NP -manager NN I-NP -'s POS B-NP -side NN I-NP -and CC O -talk VB B-VP -to TO B-PP -witnesses NNS B-NP -. . O - -The DT B-NP -chairman NN I-NP -promised VBD B-VP -Mr. NNP B-NP -Stone NNP I-NP -a DT B-NP -decision NN I-NP -within IN B-PP -two CD B-NP -weeks NNS I-NP -. . O - -Bob NNP B-NP -Stone NNP I-NP -is VBZ B-VP -a DT B-NP -fictional JJ I-NP -name NN I-NP -, , O -but CC O -the DT B-NP -incident NN I-NP -described VBN B-VP -is VBZ B-VP -real JJ B-ADJP -. . O - -It PRP B-NP -happened VBD B-VP -at IN B-PP -Northrop NNP B-NP -Corp. NNP I-NP -in IN B-PP -Los NNP B-NP -Angeles NNP I-NP -. . O - -The DT B-NP -court NN I-NP -is VBZ B-VP -called VBN I-VP -the DT B-NP -Management NNP I-NP -Appeals NNP I-NP -Committee NNP I-NP -, , O -or CC O -just RB B-NP -`` `` I-NP -MAC NNP I-NP -, , O -'' '' O -and CC O -it PRP B-NP -is VBZ B-VP -likely JJ B-ADJP -to TO B-VP -hear VB I-VP -a DT B-NP -couple NN I-NP -of IN I-NP -dozen NN I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -. . O - -Alter VB B-VP -some DT B-NP -details NNS I-NP -of IN B-PP -this DT B-NP -example NN I-NP -and CC O -it PRP B-NP -could MD B-VP -be VB I-VP -taking VBG I-VP -place NN B-NP -today NN B-ADVP -at IN B-PP -Federal NNP B-NP -Express NNP I-NP -in IN B-PP -Memphis NNP B-NP -, , O -the DT B-NP -Defense NNP I-NP -and CC I-NP -Underseas NNP I-NP -Systems NNP I-NP -divisions NNS I-NP -of IN B-PP -Honeywell NNP B-NP -in IN B-PP -Minneapolis NNP B-NP -, , O -a DT B-NP -General NNP I-NP -Electric NNP I-NP -plant NN I-NP -in IN B-PP -Columbia NNP B-NP -, , O -Md. NNP B-NP -, , O -or CC O -a DT B-NP -number NN I-NP -of IN B-PP -other JJ B-NP -companies NNS I-NP -. . O - -These DT B-NP -firms NNS I-NP -are VBP B-VP -pioneers NNS B-NP -in IN B-PP -a DT B-NP -significant JJ I-NP -new JJ I-NP -trend NN I-NP -in IN B-PP -the DT B-NP -corporate JJ I-NP -world NN I-NP -: : O -the DT B-NP -rise NN I-NP -of IN B-PP -what WP B-NP -I PRP B-NP -call VBP B-VP -corporate JJ B-NP -due JJ I-NP -process NN I-NP -. . O - -Although IN B-SBAR -corporate JJ B-NP -due JJ I-NP -process NN I-NP -is VBZ B-VP -practiced VBN I-VP -today NN B-NP -in IN B-PP -few JJ B-NP -companies NNS I-NP --- : O -perhaps RB B-ADVP -40 CD B-NP -to TO I-NP -60 CD I-NP --- : O -it PRP B-NP -is VBZ B-VP -one CD B-NP -of IN B-PP -the DT B-NP -fastest JJS I-NP -developing VBG I-NP -trends NNS I-NP -in IN B-PP -industry NN B-NP -. . O - -In IN B-PP -the DT B-NP -coming VBG I-NP -decade NN I-NP -a DT B-NP -majority NN I-NP -of IN B-PP -people-oriented JJ B-NP -companies NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -adopt VB I-VP -it PRP B-NP -. . O - -Corporate JJ B-NP -due JJ I-NP -process NN I-NP -appeals NNS B-VP -to TO B-PP -management NN B-NP -for IN B-PP -a DT B-NP -variety NN I-NP -of IN B-PP -reasons NNS B-NP -. . O - -It PRP B-NP -reduces VBZ B-VP -lawsuits NNS B-NP -from IN B-PP -disgruntled JJ B-NP -employees NNS I-NP -and CC I-NP -ex-employees NNS I-NP -, , O -with IN B-PP -all DT B-NP -that WDT B-NP -means VBZ B-VP -for IN B-PP -reduced VBN B-NP -legal JJ I-NP -costs NNS I-NP -and CC O -better RBR B-NP -public JJ I-NP -relations NNS I-NP -. . O - -It PRP B-NP -helps VBZ B-VP -to TO I-VP -keep VB I-VP -out IN B-PRT -unions NNS B-NP -. . O - -It PRP B-NP -increases VBZ B-VP -employee NN B-NP -commitment NN I-NP -to TO B-PP -the DT B-NP -company NN I-NP -, , O -with IN B-PP -all DT B-NP -that WDT B-NP -means VBZ B-VP -for IN B-PP -efficiency NN B-NP -and CC O -quality NN B-NP -control NN I-NP -. . O - -What WP B-NP -must MD O -your PRP$ B-NP -management NN I-NP -team NN I-NP -do VBP B-VP -to TO B-VP -establish VB I-VP -corporate JJ B-NP -due JJ I-NP -process NN I-NP -? . O - -Here RB B-ADVP -are VBP B-VP -four CD B-NP -key JJ I-NP -steps NNS I-NP -: : O - -1 CD B-LST -. . O -Make VB B-VP -sure JJ B-ADJP -you PRP B-NP -have VBP B-VP -a DT B-NP -strong JJ I-NP -personnel NNS I-NP -department NN I-NP -. . O - -It PRP B-NP -must MD B-VP -be VB I-VP -able JJ B-ADJP -to TO B-VP -handle VB I-VP -most RBS B-NP -of IN B-PP -the DT B-NP -complaints NNS I-NP -that WDT B-NP -can MD B-VP -not RB I-VP -be VB I-VP -solved VBN I-VP -in IN B-PP -the DT B-NP -trenches NNS I-NP -by IN B-PP -managers NNS B-NP -and CC O -their PRP$ B-NP -subordinates NNS I-NP -, , O -else RB B-ADVP -the DT B-NP -company NN I-NP -court NN I-NP -or CC I-NP -adjudicators NNS I-NP -will MD B-VP -be VB B-VP -inundated VBN I-VP -with IN B-PP -cases NNS B-NP -. . O - -At IN B-PP -Polaroid NNP B-NP -, , O -the DT B-NP -Personnel NNP I-NP -Policy NNP I-NP -Planning NNP I-NP -Committee NNP I-NP -may MD B-VP -hear VB I-VP -only RB B-NP -about IN I-NP -20 CD I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -; : O -the DT B-NP -rest NN I-NP -of IN B-PP -the DT B-NP -many JJ I-NP -hundreds NNS I-NP -of IN B-PP -complaints NNS B-NP -are VBP B-VP -resolved VBN I-VP -at IN B-PP -earlier JJR B-NP -stages NNS I-NP -. . O - -At IN B-PP -TWA NNP B-NP -, , O -the DT B-NP -System NNP I-NP -Board NNP I-NP -of IN B-PP -Adjustment NNP B-NP -hears VBZ B-VP -50 CD B-NP -to TO I-NP -75 CD I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -, , O -only RB B-NP -a DT I-NP -fraction NN I-NP -of IN B-PP -the DT B-NP -complaints NNS I-NP -brought VBN B-VP -to TO B-PP -personnel NNS B-NP -specialists NNS I-NP -. . O - -At IN B-PP -Citicorp NNP B-NP -, , O -the DT B-NP -Problem NNP I-NP -Review NNP I-NP -Board NNP I-NP -may MD B-VP -hear VB I-VP -only RB B-NP -12 CD I-NP -or CC I-NP -so RB I-NP -cases VBZ I-NP -because IN B-PP -of IN I-PP -personnel NNS B-NP -'s POS B-NP -skill NN I-NP -in IN B-PP -complaint-resolution NN B-NP -. . O - -In IN B-PP -a DT B-NP -typical JJ I-NP -year NN I-NP -, , O -up IN B-NP -to TO I-NP -20 CD I-NP -% NN I-NP -of IN B-PP -the DT B-NP -work NN I-NP -force NN I-NP -goes VBZ B-VP -to TO B-PP -personnel NNS B-NP -specialists NNS I-NP -with IN B-PP -complaints NNS B-NP -of IN B-PP -unfair JJ B-NP -treatment NN I-NP -. . O - -In IN B-PP -a DT B-NP -large JJ I-NP -company NN I-NP -that WDT B-NP -means VBZ B-VP -many JJ B-NP -hundreds NNS I-NP -of IN B-PP -complaints NNS B-NP -for IN B-PP -personnel NNS B-NP -to TO B-VP -handle VB I-VP -. . O - -2 CD B-LST -. . O -Formally RB B-ADVP -or CC I-ADVP -informally RB I-ADVP -, , O -train NN B-VP -all DT B-NP -your PRP$ I-NP -managers NNS I-NP -and CC I-NP -supervisors NNS I-NP -in IN B-PP -the DT B-NP -company NN I-NP -'s POS B-NP -due-process NN I-NP -approach NN I-NP -. . O - -See VB B-VP -that IN B-SBAR -they PRP B-NP -know VBP B-VP -company NN B-NP -personnel NNS I-NP -policy NN I-NP -backwards RB B-ADVP -and CC I-ADVP -forwards RB I-ADVP -, , O -for IN O -it PRP B-NP -is VBZ B-VP -the DT B-NP -`` `` I-NP -law NN I-NP -'' '' O -governing VBG B-VP -company NN B-NP -courts NNS I-NP -and CC I-NP -adjudicators NNS I-NP -. . O - -Coach NNP B-VP -them PRP B-NP -in IN B-PP -handling NN B-VP -complaints NNS B-NP -so RB B-SBAR -that IN I-SBAR -they PRP B-NP -can MD B-VP -resolve VB I-VP -problems NNS B-NP -immediately RB B-ADVP -. . O - -In IN B-SBAR -case NN O -managers NNS B-NP -and CC O -personnel NNS B-NP -specialists NNS I-NP -are VBP B-VP -unsuccessful JJ B-ADJP -and CC O -subordinates NNS B-NP -take VBP B-VP -their PRP$ B-NP -complaints NNS I-NP -to TO B-PP -a DT B-NP -company NN I-NP -court NN I-NP -or CC I-NP -adjudicator NN I-NP -, , O -teach VB B-VP -managers NNS B-NP -to TO B-VP -accept VB I-VP -reversals NNS B-NP -as IN B-PP -a DT B-NP -fact NN I-NP -of IN B-PP -business NN B-NP -life NN I-NP -, , O -for IN O -in IN B-PP -a DT B-NP -good JJ I-NP -due-process NN I-NP -system NN I-NP -they PRP B-NP -are VBP B-VP -bound VBN I-VP -to TO I-VP -happen VB I-VP -. . O - -In IN B-PP -the DT B-NP -15 CD I-NP -companies NNS I-NP -I PRP B-NP -studied VBD B-VP -, , O -reversal NN B-NP -rates NNS I-NP -range VBP B-VP -on IN B-PP -the DT B-NP -average NN I-NP -from IN B-PP -20 CD B-NP -% NN I-NP -to TO B-PP -40 CD B-NP -% NN I-NP -. . O - -3 CD B-LST -. . O -Decide VB B-VP -whether IN O -you PRP B-NP -want VBP B-VP -a DT B-NP -panel NN I-NP -system NN I-NP -or CC O -a DT B-NP -single JJ I-NP -adjudicator NN I-NP -. . O - -A DT B-NP -panel NN I-NP -system NN I-NP -like IN B-PP -that DT B-NP -in NN B-PP -the DT B-NP -Bob NNP I-NP -Stone NNP I-NP -example NN I-NP -enjoys VBZ B-VP -such JJ B-NP -advantages NNS I-NP -as IN B-PP -high JJ B-NP -credibility NN I-NP -and CC O -, , O -for IN B-PP -the DT B-NP -panelists NNS I-NP -, , O -mutual JJ B-NP -support NN I-NP -. . O - -An DT B-NP -adjudicator NN I-NP -system NN I-NP --- : O -that DT B-INTJ -is VBZ I-INTJ -, , O -an DT B-NP -investigator NN I-NP -who WP B-NP -acts VBZ B-VP -first JJ B-ADVP -as IN B-PP -a DT B-NP -fact-finder NN I-NP -and CC O -then RB O -switches VBZ B-VP -hats NNS B-NP -and CC O -arbitrates VBZ B-VP -the DT B-NP -facts NNS I-NP --- : O -has VBZ B-VP -such JJ B-NP -advantages NNS I-NP -as IN B-PP -speed NN B-NP -, , O -flexibility NN B-NP -and CC O -maximum JJ B-NP -privacy NN I-NP -. . O - -International NNP B-NP -Business NNP I-NP -Machines NNPS I-NP -and CC O -Bank NNP B-NP -of IN B-PP -America NNP B-NP -are VBP B-VP -among IN B-PP -the DT B-NP -companies NNS I-NP -using VBG B-VP -the DT B-NP -single-adjudicator JJ I-NP -approach NN I-NP -. . O - -4 CD B-LST -. . O -Make VB B-VP -your PRP$ B-NP -due-process NN I-NP -system NN I-NP -visible JJ B-ADJP -. . O - -It PRP B-NP -wo MD B-VP -n't RB I-VP -do VB I-VP -any DT B-NP -good NN I-NP -for IN B-PP -anybody NN B-NP -unless IN B-SBAR -employees NNS B-NP -know VBP B-VP -about IN B-PP -it PRP B-NP -. . O - -Most JJS B-NP -managements NNS I-NP -hesitate VBP B-VP -to TO I-VP -go VB I-VP -all DT B-ADVP -out NN I-ADVP -in IN B-PP -advertising VBG B-VP -their PRP$ B-NP -due-process NN I-NP -systems NNS I-NP -for IN B-PP -fear NN B-NP -of IN B-PP -encouraging VBG B-VP -cranks NNS B-NP -and CC O -chronic JJ B-NP -soreheads NNS I-NP -to TO B-VP -file VB I-VP -complaints NNS B-NP -. . O - -On IN B-PP -the DT B-NP -other JJ I-NP -hand NN I-NP -, , O -they PRP B-NP -make VBP B-VP -sure JJ B-ADJP -at IN B-PP -a DT B-NP -minimum NN I-NP -that IN B-SBAR -their PRP$ B-NP -systems NNS I-NP -are VBP B-VP -described VBN I-VP -in IN B-PP -their PRP$ B-NP -employee NN I-NP -handbooks NNS I-NP -and CC O -talked VBD B-VP -up IN B-PRT -by IN B-PP -personnel NNS B-NP -specialists NNS I-NP -. . O - -Smith-Kline NNP B-NP -Beecham NNP I-NP -goes VBZ B-VP -further JJ B-ADVP -and CC O -sometimes RB B-VP -features VBZ I-VP -its PRP$ B-NP -grievance NN I-NP -procedure NN I-NP -in IN B-PP -closed-circuit JJ B-NP -TV NN I-NP -programs NNS I-NP -. . O - -Naturally RB B-ADVP -, , O -one CD B-NP -of IN B-PP -the DT B-NP -best JJS I-NP -ways NNS I-NP -to TO B-VP -guarantee VB I-VP -visibility NN B-NP -for IN B-PP -your PRP$ B-NP -due-process NN I-NP -system NN I-NP -is VBZ B-VP -for IN B-SBAR -top JJ B-NP -management NN I-NP -to TO B-VP -support VB I-VP -it PRP B-NP -. . O - -At IN B-PP -IBM NNP B-NP -, , O -the DT B-NP -company NN I-NP -'s POS B-NP -Open NNP I-NP -Door NNP I-NP -system NN I-NP -is VBZ B-VP -sometimes RB B-ADVP -the DT B-NP -subject NN I-NP -of IN B-PP -memorandums NNS B-NP -from IN B-PP -the DT B-NP -chief JJ I-NP -executive NN I-NP -. . O - -Federal NNP B-NP -Express NNP I-NP -goes VBZ B-VP -further JJ B-ADVP -in IN B-PP -this DT B-NP -respect NN I-NP -than IN B-PP -any DT B-NP -company NN I-NP -I PRP B-NP -know VBP B-VP -of IN B-PP -with IN B-PP -both DT B-NP -Frederick NNP B-NP -Smith NNP I-NP -and CC O -James NNP B-NP -Barksdale NNP I-NP -, , O -chief JJ B-NP -executive NN I-NP -and CC O -chief JJ B-NP -operating VBG I-NP -officer NN I-NP -, , O -respectively RB B-ADVP -, , O -sitting VBG B-VP -in IN B-PRT -on IN B-PP -the DT B-NP -Appeals NNP I-NP -Board NNP I-NP -almost RB B-NP -every DT I-NP -Tuesday NNP I-NP -to TO B-VP -decide VB I-VP -cases NNS B-NP -. . O - -Mr. NNP B-NP -Ewing NNP I-NP -is VBZ B-VP -a DT B-NP -consultant NN I-NP -based VBN B-VP -in IN B-PP -Winchester NNP B-NP -, , O -Mass. NNP B-NP -, , O -and CC O -author NN B-NP -of IN B-PP -`` `` O -Justice NNP B-NP -on IN B-PP -the DT B-NP -Job NNP I-NP -: : O -Resolving NNP B-VP -Grievances NNP B-NP -in IN B-PP -the DT B-NP -Nonunion NNP I-NP -Workplace NN I-NP -'' '' O --LRB- ( O -Harvard NNP B-NP -Business NNP I-NP -School NNP I-NP -Press NNP I-NP -, , O -1989 CD B-NP --RRB- ) O -. . O - -Tokyo NNP B-NP -stocks NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -active JJ B-NP -trading NN I-NP -Friday NNP B-NP -, , O -marking VBG B-VP -the DT B-NP -fourth JJ I-NP -consecutive JJ I-NP -daily JJ I-NP -gain NN I-NP -since IN B-PP -Monday NNP B-NP -'s POS B-NP -sharp JJ I-NP -fall NN I-NP -. . O - -London JJ B-NP -shares NNS I-NP -closed VBD B-VP -moderately RB B-ADVP -lower JJR I-ADVP -in IN B-PP -thin JJ B-NP -trading NN I-NP -. . O - -At IN B-PP -Tokyo NNP B-NP -, , O -the DT B-NP -Nikkei NNP I-NP -index NN I-NP -of IN B-PP -225 CD B-NP -selected VBN I-NP -issues NNS I-NP -was VBD B-VP -up IN B-ADVP -112.16 CD B-NP -points NNS I-NP -to TO B-PP -35486.38 CD B-NP -. . O - -The DT B-NP -index NN I-NP -advanced VBD B-VP -266.66 CD B-NP -points NNS I-NP -Thursday NNP B-NP -. . O - -In IN B-PP -early JJ B-NP -trading NN I-NP -in IN B-PP -Tokyo NNP B-NP -Monday NNP B-NP -, , O -the DT B-NP -Nikkei NNP I-NP -index NN I-NP -rose VBD B-VP -101.98 CD B-NP -points NNS I-NP -to TO B-PP -35588.36 CD B-NP -. . O - -Friday NNP B-NP -'s POS B-NP -volume NN I-NP -on IN B-PP -the DT B-NP -First NNP I-NP -Section NN I-NP -was VBD B-VP -estimated VBN I-VP -at IN B-PP -one CD B-NP -billion CD I-NP -shares NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -862 CD B-NP -million CD I-NP -Thursday NNP B-NP -. . O - -Winners NNS B-NP -outpaced VBD B-VP -losers NNS B-NP -, , O -572 CD B-ADVP -to TO I-ADVP -368 CD I-ADVP -, , O -while IN B-SBAR -181 CD B-NP -issues NNS I-NP -remained VBD B-VP -unchanged JJ B-ADJP -. . O - -With IN B-SBAR -investors NNS B-NP -relieved VBN B-ADJP -at IN B-PP -the DT B-NP -overnight JJ I-NP -gain NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -, , O -small-lot JJ B-NP -buying NN I-NP -orders NNS I-NP -streamed VBD B-VP -into IN B-PP -the DT B-NP -market NN I-NP -from IN B-PP -early JJ B-NP -morning NN I-NP -, , O -making VBG B-VP -traders NNS B-NP -believe VBP B-VP -the DT B-NP -market NN I-NP -was VBD B-VP -back RB B-ADVP -to TO B-PP -normal JJ B-NP -. . O - -The DT B-NP -Nikkei NNP I-NP -, , O -which WDT B-NP -reached VBD B-VP -as RB B-ADJP -high JJ I-ADJP -as IN B-PP -35611.38 CD B-NP -right NN B-ADVP -after IN B-PP -the DT B-NP -opening NN I-NP -, , O -surrendered VBD B-VP -part NN B-NP -of IN B-PP -its PRP$ B-NP -early JJ I-NP -advance NN I-NP -toward IN B-PP -the DT B-NP -end NN I-NP -of IN B-PP -the DT B-NP -day NN I-NP -because IN B-PP -of IN I-PP -profit-taking NN B-NP -. . O - -`` `` O -Investors NNS B-NP -, , B-NP -especially RB I-NP -dealers NNS B-NP -, , O -do VBP B-VP -n't RB I-VP -want VB I-VP -to TO I-VP -hold VB I-VP -a DT B-NP -position NN I-NP -over IN B-PP -the DT B-NP -weekend NN I-NP -, , O -'' '' O -a DT B-NP -trader NN I-NP -at IN B-PP -Dai-ichi NNP B-NP -Securities NNP I-NP -said VBD B-VP -, , O -adding VBG B-VP -, , O -though RB B-ADVP -, , O -that IN B-SBAR -the DT B-NP -trading NN I-NP -mood NN I-NP -remained VBD B-VP -positive JJ B-ADJP -through IN B-PP -the DT B-NP -afternoon NN I-NP -session NN I-NP -. . O - -The DT B-NP -Tokyo NNP I-NP -Stock NNP I-NP -Price NNP I-NP -Index NNP I-NP --LRB- ( O -Topix NNP B-NP --RRB- ) O -of IN B-PP -all DT B-NP -issues NNS I-NP -listed VBN B-VP -in IN B-PP -the DT B-NP -First NNP I-NP -Section NN I-NP -, , O -which WDT B-NP -gained VBD B-VP -22.78 CD B-NP -points NNS I-NP -Thursday NNP B-NP -, , O -was VBD B-VP -up IN B-ADVP -14.06 CD B-NP -points NNS I-NP -, , O -or CC O -0.53 CD B-NP -% NN I-NP -, , O -at IN B-PP -2679.72 CD B-NP -. . O - -The DT B-NP -Second JJ I-NP -Section NN I-NP -index NN I-NP -, , O -which WDT B-NP -rose VBD B-VP -15.72 CD B-NP -points NNS I-NP -Thursday NNP B-NP -, , O -was VBD B-VP -up IN B-ADVP -11.88 CD B-NP -points NNS I-NP -, , O -or CC O -0.32 CD B-NP -% NN I-NP -, , O -to TO B-VP -close VB I-VP -at IN B-PP -3717.46 CD B-NP -. . O - -Volume NN B-NP -in IN B-PP -the DT B-NP -second JJ I-NP -section NN I-NP -was VBD B-VP -estimated VBN I-VP -at IN B-PP -30 CD B-NP -million CD I-NP -shares NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -28 CD B-NP -million CD I-NP -Thursday NNP B-NP -. . O - -In IN B-PP -turmoil NN B-NP -caused VBN B-VP -by IN B-PP -the DT O -previous JJ B-NP -Friday NNP I-NP -'s POS B-NP -plunge NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -, , O -the DT B-NP -Nikkei NNP I-NP -marked VBD B-VP -a DT B-NP -sharp JJ I-NP -647.33-point JJ I-NP -fall NN I-NP -Monday NNP B-NP -. . O - -But CC O -the DT B-NP -Nikkei NNP I-NP -fell VBD B-VP -an DT B-NP -overall JJ I-NP -1.8 CD I-NP -% NN I-NP -in IN B-PP -value NN B-NP -that DT B-NP -day NN I-NP -compared VBN B-PP -with IN B-PP -Wall NNP B-NP -Street NNP I-NP -'s POS I-NP -far RB B-ADJP -sharper JJR I-ADJP -6.9 CD B-ADJP -% NN I-ADJP -drop NN B-NP -on IN B-PP -Oct. NNP B-NP -13 CD I-NP -. . O - -The DT B-NP -Tokyo NNP I-NP -market NN I-NP -'s POS B-NP -resiliency NN I-NP -helped VBD B-VP -participants NNS B-NP -to TO B-VP -regain VB I-VP -confidence NN B-NP -gradually RB B-ADVP -as IN B-SBAR -they PRP B-NP -spent VBD B-VP -more JJR B-NP -time NN I-NP -on IN B-PP -analyzing VBG B-VP -factors NNS B-NP -that WDT B-NP -caused VBD B-VP -the DT B-NP -Friday NNP I-NP -plunge NN I-NP -and CC O -realized VBD B-VP -these DT B-NP -problems NNS I-NP -were VBD B-VP -unique JJ B-ADJP -to TO B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -and CC B-ADJP -not RB I-ADJP -directly RB B-ADJP -related VBN I-ADJP -to TO B-PP -Tokyo NNP B-NP -. . O - -The DT B-NP -Nikkei NNP I-NP -continued VBD B-VP -to TO I-VP -gain VB I-VP -for IN B-PP -the DT B-NP -rest NN I-NP -of IN B-PP -the DT B-NP -week NN I-NP -, , O -adding VBG B-VP -1017.69 CD B-NP -points NNS I-NP -in IN B-PP -four CD B-NP -days NNS I-NP --- : O -more JJR B-VP -than IN I-VP -erasing VBG I-VP -Monday NNP B-NP -'s POS B-NP -losses NNS I-NP -. . O - -But CC O -further JJ B-NP -major JJ I-NP -advances NNS I-NP -on IN B-PP -the DT B-NP -Nikkei NNP I-NP -are VBP B-VP -n't RB I-VP -foreseen VBN I-VP -this DT B-NP -week NN I-NP -by IN B-PP -market NN B-NP -observers NNS I-NP -. . O - -Investors NNS B-NP -are VBP B-VP -still RB I-VP -waiting VBG I-VP -to TO I-VP -see VB I-VP -how WRB B-ADVP -the DT B-NP -U.S. NNP I-NP -government NN I-NP -will MD B-VP -decide VB I-VP -on IN B-PP -interest NN B-NP -rates NNS I-NP -and CC O -how WRB B-ADVP -the DT B-NP -dollar NN I-NP -will MD B-VP -be VB I-VP -stabilized VBN I-VP -. . O - -Some DT B-NP -high-priced JJ I-NP -issues NNS I-NP -made VBD B-VP -a DT B-NP -comeback NN I-NP -Friday NNP B-NP -. . O - -Pioneer NNP B-NP -surged VBD B-VP -450 CD B-NP -yen NN I-NP --LRB- ( O -$ $ B-NP -3.16 CD I-NP --RRB- ) O -to TO B-PP -6,050 CD B-NP -yen NN I-NP --LRB- ( O -$ $ B-NP -42.60 CD I-NP --RRB- ) O -. . O - -Kyocera NNP B-NP -advanced VBD B-VP -80 CD B-NP -yen NN I-NP -to TO B-PP -5,440 CD B-NP -. . O - -Fanuc NNP B-NP -gained VBD B-VP -100 CD B-NP -to TO B-PP -7,580 CD B-NP -. . O - -Breweries NNP B-NP -attracted VBD B-VP -investors NNS B-NP -because IN B-PP -of IN I-PP -their PRP$ B-NP -land NN I-NP -property NN I-NP -holdings NNS I-NP -that WDT B-NP -could MD B-VP -figure VB I-VP -in IN B-PP -development NN B-NP -or CC O -other JJ B-NP -plans NNS I-NP -, , O -traders NNS B-NP -said VBD B-VP -. . O - -Sapporo NNP B-NP -gained VBD B-VP -80 CD B-NP -to TO B-PP -1,920 CD B-NP -and CC O -Kirin NNP B-NP -added VBD B-VP -60 CD B-NP -to TO B-PP -2,070 CD B-NP -. . O - -Housings NNS B-NP -, , I-NP -constructions NNS I-NP -and CC I-NP -pharmaceuticals NNS I-NP -continued VBD B-VP -to TO I-VP -be VB I-VP -bought VBN I-VP -following VBG B-PP -Thursday NNP B-NP -'s POS B-NP -gains NNS I-NP -because IN B-PP -of IN I-PP -strong JJ B-NP -earnings NNS I-NP -outlooks NNS I-NP -. . O - -Daiwa NNP B-NP -House NNP I-NP -gained VBD B-VP -50 CD B-NP -to TO B-PP -2,660 CD B-NP -. . O - -Misawa NNP B-NP -Homes NNP I-NP -was VBD B-VP -up IN B-ADVP -20 CD B-NP -at IN B-PP -2,960 CD B-NP -. . O - -Kajima NNP B-NP -advanced VBD B-VP -40 CD B-NP -to TO B-PP -2,120 CD B-NP -and CC O -Ohbayashi NNP B-NP -added VBD B-VP -50 CD B-NP -to TO B-PP -1,730 CD B-NP -. . O - -Fujisawa NNP B-NP -added VBD B-VP -80 CD B-NP -to TO B-PP -2,010 CD B-NP -and CC O -Mochida NNP B-NP -advanced VBD B-VP -230 CD B-NP -to TO B-PP -4,400 CD B-NP -. . O - -London JJ B-NP -share NN I-NP -prices NNS I-NP -were VBD B-VP -influenced VBN I-VP -largely RB B-ADVP -by IN B-PP -declines NNS B-NP -on IN B-PP -Wall NNP B-NP -Street NNP I-NP -and CC O -weakness NN B-NP -in IN B-PP -the DT B-NP -British JJ I-NP -pound NN I-NP -. . O - -The DT B-NP -key JJ I-NP -Financial NNP I-NP -Times-Stock NNP I-NP -Exchange NNP I-NP -100-share JJ I-NP -index NN I-NP -ended VBD B-VP -10.2 CD B-NP -points NNS I-NP -lower JJR B-ADVP -at IN B-PP -2179.1 CD B-NP -, , O -above IN B-ADVP -its PRP$ B-NP -intraday JJ I-NP -low NN I-NP -of IN B-PP -2176.9 CD B-NP -, , B-ADVP -but CC I-ADVP -off IN B-ADVP -the DT B-NP -day NN I-NP -'s POS I-NP -high NN B-NP -of IN B-PP -2189 CD B-NP -. . O - -The DT B-NP -index NN I-NP -finished VBD B-VP -2.4 CD B-NP -% NN I-NP -under IN B-PP -its PRP$ B-NP -close NN I-NP -of IN B-PP -2233.9 CD B-NP -the DT B-NP -previous JJ I-NP -Friday NNP I-NP -, , O -although IN B-SBAR -it PRP B-NP -recouped VBD B-VP -some DT B-NP -of IN B-PP -the DT B-NP -sharp JJ I-NP -losses NNS I-NP -staged VBD B-VP -early JJ B-NP -last JJ I-NP -week NN I-NP -on IN B-PP -the DT B-NP -back RB I-NP -of IN B-PP -Wall NNP B-NP -Street NNP I-NP -'s POS B-NP -fall NN I-NP -. . O - -London NNP B-NP -was VBD B-VP -weak JJ B-ADJP -throughout IN B-PP -Friday NNP B-NP -'s POS B-NP -trading NN I-NP -, , O -however RB B-ADVP -, , O -on IN B-PP -what WP B-NP -dealers NNS B-NP -attributed VBD B-VP -to TO B-PP -generally RB B-NP -thin JJ I-NP -interest NN I-NP -ahead RB B-ADVP -of IN B-PP -the DT B-NP -weekend NN I-NP -and CC O -this DT B-NP -week NN I-NP -'s POS I-NP -potentially RB B-ADJP -important JJ I-ADJP -U.K. NNP B-NP -trade NN I-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -. . O - -The DT B-NP -FT-SE NNP I-NP -100 CD I-NP -largely RB B-ADVP -remained VBD B-VP -within IN B-PP -an DT B-NP -11-point JJ I-NP -range NN I-NP -establshed VBN B-VP -within IN B-PP -the DT B-NP -first JJ I-NP -hour NN I-NP -of IN B-PP -trading NN B-NP -before IN B-PP -it PRP B-NP -eased VBD B-VP -to TO B-PP -an DT B-NP -intraday JJ I-NP -low JJ I-NP -late RB B-ADVP -in IN B-PP -the DT B-NP -session NN I-NP -when WRB B-ADVP -a DT B-NP -flurry NN I-NP -of IN B-PP -program NN B-NP -selling VBG I-NP -pushed VBN B-VP -Wall NNP B-NP -Street NNP I-NP -lower JJR B-ADVP -. . O - -The DT B-NP -FT NNP I-NP -30-share JJ I-NP -index NN I-NP -closed VBD B-VP -11.0 CD B-NP -points NNS I-NP -lower JJR B-ADVP -at IN B-PP -1761.0 CD B-NP -. . O - -Volume NN B-NP -was VBD B-VP -extremely RB B-ADJP -thin JJ I-ADJP -at IN B-PP -351.3 CD B-NP -million CD I-NP -shares NNS I-NP -, , O -the DT B-NP -lightest JJS I-NP -volume NN I-NP -of IN B-PP -the DT B-NP -week NN I-NP -and CC O -modestly RB B-ADVP -under IN B-PP -Thursday NNP B-NP -'s POS B-NP -387.4 CD I-NP -million CD I-NP -shares NNS I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -the DT B-NP -day NN I-NP -'s POS B-NP -action NN I-NP -was VBD B-VP -featureless JJ B-ADJP -outside IN B-PP -some DT B-NP -response NN I-NP -to TO B-PP -sterling NN B-NP -'s POS B-NP -early JJ I-NP -weakness NN I-NP -against IN B-PP -the DT B-NP -mark NN I-NP -, , O -and CC O -fears NNS B-NP -that IN B-SBAR -Wall NNP B-NP -Street NNP I-NP -might MD B-VP -open RB I-VP -lower JJR B-ADVP -after IN B-PP -its PRP$ B-NP -strong JJ I-NP -leap NN I-NP -forward RB B-ADVP -Thursday NNP B-NP -. . O - -They PRP B-NP -added VBD B-VP -that IN B-SBAR -market-makers NNS B-NP -were VBD B-VP -largely RB I-VP -sidelined VBN I-VP -after IN B-PP -aggressively RB B-VP -supporting VBG I-VP -the DT B-NP -market NN I-NP -Thursday NNP B-NP -in IN B-PP -their PRP$ B-NP -quest NN I-NP -to TO B-VP -cover VB I-VP -internal JJ B-NP -shortages NNS I-NP -of IN B-PP -FT-SE NNP B-NP -100 CD I-NP -shares NNS I-NP -. . O - -Interest NN B-NP -may MD B-VP -remain VB I-VP -limited JJ B-ADJP -into IN B-PP -tomorrow NN B-NP -'s POS B-NP -U.K. NNP I-NP -trade NN I-NP -figures NNS I-NP -, , O -which WDT B-NP -the DT B-NP -market NN I-NP -will MD B-VP -be VB I-VP -watching VBG I-VP -closely RB B-ADVP -to TO B-VP -see VB I-VP -if IN B-SBAR -there EX B-NP -is VBZ B-VP -any DT B-NP -improvement NN I-NP -after IN B-PP -disappointing JJ B-NP -numbers NNS I-NP -in IN B-PP -the DT B-NP -previous JJ I-NP -two CD I-NP -months NNS I-NP -. . O - -The DT B-NP -key JJ I-NP -corporate JJ I-NP -news NN I-NP -of IN B-PP -the DT B-NP -day NN I-NP -was VBD B-VP -that IN B-SBAR -British JJ B-NP -Airways NNPS I-NP -decided VBD B-VP -to TO I-VP -withdraw VB I-VP -from IN B-PP -a DT B-NP -management-led JJ I-NP -bid NN I-NP -for IN B-PP -UAL NNP B-NP -Corp. NNP I-NP -, , O -the DT B-NP -parent NN I-NP -of IN B-PP -United NNP B-NP -Airlines NNPS I-NP -. . O - -British JJ B-NP -Airways NNPS I-NP -rose VBD B-VP -initially RB B-ADVP -after IN B-PP -announcing VBG B-VP -its PRP$ B-NP -withdrawal NN I-NP -from IN B-PP -the DT B-NP -UAL NNP I-NP -deal NN I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -they PRP B-NP -viewed VBD B-VP -the DT O -initial JJ O -# # O -390-million CD O --LRB- ( O -$ $ B-ADJP -622 CD O -million CD O --RRB- ) O -outlay NN B-NP -for IN B-PP -a DT B-NP -15 CD I-NP -% NN I-NP -stake NN I-NP -in IN B-PP -the DT B-NP -airline NN I-NP -as IN B-PP -a DT B-NP -bit NN I-NP -much JJ I-NP -. . O - -Its PRP$ B-NP -shares NNS I-NP -slid VBD B-VP -in IN B-PP -late JJ B-NP -dealings NNS I-NP -to TO B-VP -close VB I-VP -a DT B-NP -penny NN I-NP -per IN B-PP -share NN B-NP -lower JJR B-ADVP -at IN B-PP -197 CD B-NP -pence NN I-NP -. . O - -The DT B-NP -airline NN I-NP -was VBD B-VP -the DT B-NP -most RBS I-NP -active JJ I-NP -FT-SE NNP I-NP -100 CD I-NP -at IN B-PP -8.2 CD B-NP -million CD I-NP -shares NNS I-NP -traded VBN B-VP -. . O - -The DT B-NP -next JJ I-NP -most RBS I-NP -active JJ I-NP -top-tier JJ I-NP -stock NN I-NP -was VBD B-VP -B.A.T NNP B-NP -Industries NNPS I-NP -, , O -the DT B-NP -target NN I-NP -of IN B-PP -Sir NNP B-NP -James NNP I-NP -Goldsmith NNP I-NP -'s POS B-NP -# # B-ADJP -13.4 CD O -billion CD O -bid NN B-NP -. . O - -The DT B-NP -company NN I-NP -gained VBD B-VP -shareholder NN B-NP -approval NN I-NP -Thursday NNP B-NP -to TO B-VP -restructure VB I-VP -in IN B-PP -a DT B-NP -bid NN I-NP -to TO B-VP -fend VB I-VP -off IN B-PRT -the DT B-NP -hostile JJ I-NP -takeover NN I-NP -. . O - -Sir NNP B-NP -James NNP I-NP -said VBD B-VP -Thursday NNP B-NP -night NN I-NP -that IN B-SBAR -his PRP$ B-NP -plans NNS I-NP -for IN B-PP -the DT B-NP -takeover NN I-NP -had VBD B-VP -n't RB I-VP -changed VBN I-VP -. . O - -B.A.T NNP B-NP -ended VBD B-VP -the DT B-NP -day NN I-NP -at IN B-PP -778 CD B-NP -, , O -down JJ B-ADVP -5 NN B-NP -, , O -on IN B-PP -turnover NN B-NP -of IN B-PP -7.5 CD B-NP -million CD I-NP -shares NNS I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -it PRP B-NP -was VBD B-VP -hit VBN I-VP -by IN B-PP -some DT B-NP -profit-taking NN I-NP -after IN B-PP -gains NNS B-NP -since IN B-PP -mid-week NN B-NP -. . O - -In IN B-PP -other JJ B-NP -active JJ I-NP -shares NNS I-NP -, , O -Trusthouse NNP B-NP -Forte NNP I-NP -shed VB B-VP -10 CD B-NP -to TO B-PP -294 CD B-NP -on IN B-PP -volume NN B-NP -of IN B-PP -6.4 CD B-NP -million CD I-NP -shares NNS I-NP -after IN B-PP -a DT B-NP -Barclays NNP I-NP -De NNP I-NP -Zoete NNP I-NP -Wedd NNP I-NP -downgrading NN I-NP -, , O -while IN B-SBAR -Hillsdown NNP B-NP -Holdings NNP I-NP -, , O -a DT B-NP -food NN I-NP -products NNS I-NP -concern VBP I-NP -, , O -was VBD B-VP -boosted VBN I-VP -2 CD B-NP -to TO B-PP -271 CD B-NP -after IN O -it PRP B-NP -disclosed VBD B-VP -it PRP B-NP -would MD B-VP -seek VB I-VP -shareholder NN B-NP -approval NN I-NP -to TO B-VP -begin VB I-VP -share NN B-NP -repurchases NNS I-NP -. . O - -Elsewhere RB B-ADVP -in IN B-PP -Europe NNP B-NP -, , O -share NN B-NP -prices NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -Stockholm NNP B-NP -, , I-NP -Brussels NNP I-NP -and CC I-NP -Milan NNP I-NP -. . O - -Prices NNS B-NP -were VBD B-VP -lower JJR B-ADJP -in IN B-PP -Frankfurt NNP B-NP -, , I-NP -Zurich NNP I-NP -, , I-NP -Paris NNP I-NP -and CC I-NP -Amsterdam NNP I-NP -. . O - -South JJ B-NP -African JJ I-NP -gold NN I-NP -stocks NNS I-NP -closed VBD B-VP -moderately RB B-ADVP -lower JJR I-ADVP -. . O - -Share NN B-NP -prices NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -Sydney NNP B-NP -, , O -Taipei NNP B-NP -, , O -Wellington NNP B-NP -, , O -Manila NNP B-NP -, , O -Hong NNP B-NP -Kong NNP I-NP -and CC O -Singapore NNP B-NP -and CC O -were VBD B-VP -lower JJR B-ADJP -in IN B-PP -Seoul NNP B-NP -. . O - -Here RB B-ADVP -are VBP B-VP -price NN B-NP -trends NNS I-NP -on IN B-PP -the DT B-NP -world NN I-NP -'s POS B-NP -major JJ I-NP -stock NN I-NP -markets NNS I-NP -, , O -as IN B-SBAR -calculated VBN B-VP -by IN B-PP -Morgan NNP B-NP -Stanley NNP I-NP -Capital NNP I-NP -International NNP I-NP -Perspective NNP I-NP -, , O -Geneva NNP B-NP -. . O - -To TO B-VP -make VB I-VP -them PRP B-NP -directly RB B-ADJP -comparable JJ I-ADJP -, , O -each DT B-NP -index NN I-NP -is VBZ B-VP -based VBN I-VP -on IN B-PP -the DT B-NP -close NN I-NP -of IN B-PP -1969 CD B-NP -equaling VBG B-VP -100 CD B-NP -. . O - -The DT B-NP -percentage NN I-NP -change NN I-NP -is VBZ B-VP -since IN B-PP -year-end NN B-NP -. . O - -The DT B-NP -U.S. NNP I-NP -is VBZ B-VP -required VBN I-VP -to TO I-VP -notify VB I-VP -foreign JJ B-NP -dictators NNS I-NP -if IN B-SBAR -it PRP B-NP -knows VBZ B-VP -of IN B-PP -coup NN B-NP -plans NNS I-NP -likely JJ B-ADJP -to TO B-VP -endanger VB I-VP -their PRP$ B-NP -lives NNS I-NP -, , O -government NN B-NP -officials NNS I-NP -said VBD B-VP -. . O - -The DT B-NP -notification NN I-NP -policy NN I-NP -was VBD B-VP -part NN B-NP -of IN B-PP -a DT B-NP -set NN I-NP -of IN B-PP -guidelines NNS B-NP -on IN B-PP -handling NN B-VP -coups NNS B-NP -outlined VBN B-VP -in IN B-PP -a DT B-NP -secret JJ I-NP -1988 CD I-NP -exchange NN I-NP -of IN B-PP -letters NNS B-NP -between IN B-PP -the DT B-NP -Reagan NNP I-NP -administration NN I-NP -and CC O -the DT B-NP -Senate NNP I-NP -Intelligence NNP I-NP -Committee NNP I-NP -. . O - -The DT B-NP -existence NN I-NP -of IN B-PP -the DT B-NP -guidelines NNS I-NP -has VBZ B-VP -become VBN I-VP -known VBN I-VP -since IN B-SBAR -President NNP B-NP -Bush NNP I-NP -disclosed VBD B-VP -them PRP B-NP -privately RB B-ADVP -to TO B-PP -seven CD B-NP -Republican NNP I-NP -senators NNS I-NP -at IN B-PP -a DT B-NP -White NNP I-NP -House NNP I-NP -meeting NN I-NP -last JJ B-NP -Monday NNP I-NP -. . O - -Officials NNS B-NP -familiar JJ B-ADJP -with IN B-PP -the DT B-NP -meeting NN I-NP -said VBD B-VP -Mr. NNP B-NP -Bush NNP I-NP -cited VBD B-VP -the DT B-NP -policy NN I-NP -as IN B-PP -an DT B-NP -example NN I-NP -of IN B-PP -the DT B-NP -sort NN I-NP -of IN B-PP -congressional JJ B-NP -requirements NNS I-NP -the DT B-NP -administration NN I-NP -contends VBZ B-VP -contribute VB B-VP -to TO B-PP -the DT B-NP -failure NN I-NP -of IN B-PP -such JJ B-NP -covert JJ I-NP -actions NNS I-NP -as IN B-PP -this DT B-NP -month NN I-NP -'s POS B-NP -futile JJ I-NP -effort NN I-NP -to TO B-VP -oust VB I-VP -Panamanian JJ B-NP -dictator NN I-NP -Manuel NNP I-NP -Noriega NNP I-NP -. . O - -According VBG B-PP -to TO B-PP -the DT B-NP -officials NNS I-NP -, , O -Mr. NNP B-NP -Bush NNP I-NP -even RB B-ADVP -read VB B-VP -to TO B-PP -the DT B-NP -senators NNS I-NP -selections NNS B-NP -from IN B-PP -a DT B-NP -highly RB I-NP -classified VBN I-NP -letter NN I-NP -from IN B-PP -the DT B-NP -committee NN I-NP -to TO B-PP -the DT B-NP -White NNP I-NP -House NNP I-NP -discussing VBG B-VP -the DT B-NP -guidelines NNS I-NP -. . O - -They PRP B-NP -said VBD B-VP -the DT B-NP -president NN I-NP -conceded VBD B-VP -the DT B-NP -notification NN I-NP -requirement NN I-NP -did VBD B-VP -n't RB I-VP -affect VB I-VP -his PRP$ B-NP -decision NN I-NP -to TO B-VP -lend VB I-VP -only RB B-NP -minor JJ I-NP -support NN I-NP -to TO B-PP -this DT B-NP -month NN I-NP -'s POS B-NP -Panama NNP I-NP -coup NN I-NP -effort NN I-NP -. . O - -No DT B-NP -notification NN I-NP -was VBD B-VP -ever RB I-VP -considered VBN I-VP -, , O -officials NNS B-NP -said VBD B-VP -, , O -apparently RB B-ADVP -because IN B-SBAR -the DT B-NP -U.S. NNP I-NP -did VBD B-VP -n't RB I-VP -think VB I-VP -the DT B-NP -coup NN I-NP -plotters NNS I-NP -intended VBN B-VP -to TO I-VP -kill VB I-VP -Mr. NNP B-NP -Noriega NNP I-NP -, , O -but CC O -merely RB B-VP -sought VBD I-VP -to TO I-VP -imprison VB I-VP -him PRP B-NP -. . O - -What WP B-NP -'s VBZ B-VP -more JJR B-NP -, , O -both DT B-NP -administration NN B-NP -and CC O -congressional JJ B-NP -officials NNS I-NP -hint VBP B-VP -that IN B-SBAR -the DT B-NP -notification NN I-NP -requirement NN I-NP -is VBZ B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -dropped VBN I-VP -from IN B-PP -the DT B-NP -guidelines NNS I-NP -on IN B-PP -coup NN B-NP -attempts NNS I-NP -that WDT B-NP -are VBP B-VP -being VBG I-VP -rewritten VBN I-VP -by IN B-PP -the DT B-NP -panel NN I-NP -and CC O -the DT B-NP -White NNP I-NP -House NNP I-NP -. . O - -The DT B-NP -rewriting VBG I-NP -was VBD B-VP -launched VBN I-VP -at IN B-PP -a DT B-NP -meeting NN I-NP -between IN B-PP -Mr. NNP B-NP -Bush NNP I-NP -and CC O -intelligence NN B-NP -committee NN I-NP -leaders NNS I-NP -Oct. NNP B-NP -12 CD I-NP -, , O -a DT B-NP -few JJ I-NP -days NNS I-NP -before IN B-PP -the DT B-NP -meeting NN I-NP -at IN B-PP -which WDT B-NP -the DT B-NP -president NN I-NP -complained VBD B-VP -about IN B-PP -the DT B-NP -rules NNS I-NP -. . O - -However RB B-ADVP -, , O -the DT B-NP -disclosure NN I-NP -of IN B-PP diff --git a/paddle/trainer/tests/train_files.txt b/paddle/trainer/tests/train_files.txt deleted file mode 100644 index 1c268914953ff090ae47c56051fcf1cad0e1707b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train_files.txt +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/train_proto.bin diff --git a/paddle/trainer/tests/train_sparse.list b/paddle/trainer/tests/train_sparse.list deleted file mode 100644 index 6ea020e2202f8464f8a647cd96c84a9d17a03ae3..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train_sparse.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/compare_sparse_data diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 2c2cc6245932d4af56a68d6399ce31f008bf3748..e2f5592248fd0b6166c2d11af02cef7815673def 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -540,6 +540,10 @@ message LayerConfig { // for switch order layer optional ReshapeConfig reshape_conf = 59; + + // for batch normalization layer + // The small constant added to the variance to improve numeric stability. + optional double epsilon = 60 [ default = 0.00001 ]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5bd68e211ac1c8e05f40dc3ca37eef99f32af47f..5ba0e50c6ba0f84a3ea87d5a5199fef23a5b05ea 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1116,35 +1116,6 @@ def PyData(files=None, return data_config -@config_func -def ProtoData(files=None, - type=None, - file_group_queue_capacity=None, - load_file_count=None, - constant_slots=None, - load_thread_num=None, - **xargs): - data_config = create_data_config_proto(**xargs) - if type is None: - data_config.type = 'proto' - else: - data_config.type = type - data_config.files = files - - # When type="proto_group", one data provider contains at most - # load_file_count files, and there are at most - # (queue_capacity + load_thread_num + 1) data providers in memory - if file_group_queue_capacity is not None: - data_config.file_group_conf.queue_capacity = file_group_queue_capacity - if load_file_count is not None: - data_config.file_group_conf.load_file_count = load_file_count - if load_thread_num is not None: - data_config.file_group_conf.load_thread_num = load_thread_num - if constant_slots: - data_config.constant_slots.extend(constant_slots) - return data_config - - #real data for training is actually provided by "sub_data" data providers. @config_func def MultiData(sub_data=[]): @@ -1826,7 +1797,7 @@ class FCLayer(LayerBase): self.layer_type = 'mkldnn_fc' config_assert( len(inputs) == 1, - "MkldnnFCLayer support one and only one input!") + "MKLDNNFCLayer support one and only one input!") super(FCLayer, self).__init__( name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): @@ -1837,7 +1808,7 @@ class FCLayer(LayerBase): sparse = format == "csr" or format == "csc" if use_mkldnn: config_assert(not sparse, - "MkldnnFCLayer do not support sparse format yet") + "MKLDNNFCLayer do not support sparse format yet") if use_mkldnn_wgt: dims = [self.config.size, input_layer.size] if sparse: @@ -1853,7 +1824,7 @@ class FCLayer(LayerBase): @config_layer('mkldnn_fc') -class MkldnnFcLayer(FCLayer): +class MKLDNNFcLayer(FCLayer): layer_type = 'mkldnn_fc' @@ -2066,13 +2037,20 @@ class ParameterReluLayer(LayerBase): def __init__(self, name, inputs, partial_sum=1, **args): super(ParameterReluLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **args) + input_layer = self.get_input_layer(0) config_assert(len(self.inputs) == 1, "prelu layer has only one input.") config_assert(input_layer.size % partial_sum == 0, "a wrong setting for partial_sum") + + dims = [1, input_layer.size / partial_sum] self.set_layer_size(input_layer.size) self.config.partial_sum = partial_sum - self.create_input_parameter(0, input_layer.size / partial_sum) + self.create_input_parameter(0, input_layer.size / partial_sum, dims) + + self.set_layer_height_width(self.get_input_layer(0).height, \ + self.get_input_layer(0).width) + self.set_layer_depth(self.get_input_layer(0).depth) @config_layer('conv') @@ -2434,6 +2412,7 @@ class BatchNormLayer(LayerBase): bias=True, img3D=False, use_global_stats=True, + epsilon=1e-5, moving_average_fraction=0.9, batch_norm_type=None, mean_var_names=None, @@ -2482,6 +2461,9 @@ class BatchNormLayer(LayerBase): self.config.use_global_stats = use_global_stats if moving_average_fraction is not None: self.config.moving_average_fraction = moving_average_fraction + if epsilon is not None: + assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." + self.config.epsilon = epsilon input_layer = self.get_input_layer(0) image_conf = self.config.inputs[0].image_conf @@ -2714,7 +2696,7 @@ Usage: max_sort_size = -1, inputs = ["output", "score"]) Input data: Samples of the same query should be loaded as a sequence, - by ProtoDataProvider or PyDataProvider etc.. User should provide + by PyDataProvider etc.. User should provide scores for each sample. The score slot should be the 2nd input of lambdaRank layer. @@ -3209,6 +3191,18 @@ class SubNestedSequenceLayer(LayerBase): self.set_layer_size(size) +@config_layer('dot_prod') +class DotProdLayer(LayerBase): + def __init__(self, name, inputs, device=None): + super(DotProdLayer, self).__init__( + name, 'dot_prod', 0, inputs, device=device) + config_assert(len(inputs) == 2, 'DotProdLayer must have 2 inputs.') + config_assert( + self.get_input_layer(0).size == self.get_input_layer(1).size, + "Two inputs should have the same size.") + self.set_layer_size(1) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): @@ -3330,6 +3324,20 @@ class RowL2NormLayer(LayerBase): self.set_layer_size(input_layer.size) +@config_layer('cos') +class CosSimLayer(LayerBase): + def __init__(self, name, inputs, cos_scale=1, device=None): + super(CosSimLayer, self).__init__( + name, 'cos', 1, inputs=inputs, device=device) + config_assert( + len(self.inputs) == 2, + 'The CosSimLayer expects two and only two inputs.') + config_assert( + self.get_input_layer(0).size == self.get_input_layer(1).size, + 'The two inputs of CosSimLayer must have the same dimensionality.') + self.config.cos_scale = cos_scale + + @config_layer('cos_vm') class CosSimVecMatLayer(LayerBase): def __init__(self, name, size, inputs, cos_scale=1.0, device=None): @@ -3337,10 +3345,24 @@ class CosSimVecMatLayer(LayerBase): name, 'cos_vm', size, inputs=inputs, device=device) self.config.cos_scale = cos_scale config_assert( - len(self.inputs) == 2, 'CosSimVecMatLayer must have 2 inputs') + len(self.inputs) == 2, 'The CosSimVecMatLayer must have 2 inputs.') config_assert( size * self.get_input_layer(0).size == self.get_input_layer(1).size, - 'Wrong input size for CosSimVecMatLayer') + 'Wrong input size for CosSimVecMatLayer.') + + +@config_layer('l2_distance') +class L2DistanceLayer(LayerBase): + def __init__(self, name, inputs, device=None): + super(L2DistanceLayer, self).__init__( + name, 'l2_distance', 1, inputs=inputs, device=device) + config_assert( + len(self.inputs) == 2, ('The L2DistanceLayer must have ' + 'and only have 2 inputs.')) + config_assert( + self.get_input_layer(0).size == self.get_input_layer(1).size, + ('Two inputs of the L2DistanceLayer must have ' + 'the same dimensionality.')) @config_layer('sampling_id') @@ -3384,18 +3406,6 @@ class AverageLayer(LayerBase): self.create_bias_parameter(bias, self.config.size) -@config_layer('cos') -class CosSimLayer(LayerBase): - def __init__(self, name, inputs, cos_scale=1, device=None): - super(CosSimLayer, self).__init__( - name, 'cos', 1, inputs=inputs, device=device) - config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs') - config_assert( - self.get_input_layer(0).size == self.get_input_layer(1).size, - 'inputs of CosSimLayer must have same dim') - self.config.cos_scale = cos_scale - - @config_layer('tensor') class TensorLayer(LayerBase): def __init__(self, name, size, inputs, bias=True, **xargs): @@ -3506,11 +3516,17 @@ def ExpressionLayer(name, inputs, **xargs): @config_layer('concat') class ConcatenateLayer(LayerBase): + layer_type = 'concat' + def __init__(self, name, inputs, bias=False, **xargs): config_assert(inputs, 'inputs cannot be empty') config_assert(not bias, 'ConcatenateLayer cannot support bias.') + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + if self.layer_type == "mkldnn_concat": + config_assert(use_mkldnn, "mkldnn_concat only support MKLDNN") + self.layer_type = 'mkldnn_concat' if use_mkldnn else 'concat' super(ConcatenateLayer, self).__init__( - name, 'concat', 0, inputs=inputs, **xargs) + name, self.layer_type, 0, inputs=inputs, **xargs) size = 0 for input_index in xrange(len(self.inputs)): assert self.get_input_layer(0).height == self.get_input_layer( @@ -3530,6 +3546,11 @@ class ConcatenateLayer(LayerBase): self.set_layer_size(size) +@config_layer('mkldnn_concat') +class MKLDNNConcatLayer(ConcatenateLayer): + layer_type = 'mkldnn_concat' + + # like concat layer, but each input layer was processed by a Projection. @config_layer('concat2') class ConcatenateLayer2(LayerBase): diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py index c749fa827fea4a808ab715dcb3442aa24d06a4d2..00efc01c0592107314f5b23c951706d039d49a88 100644 --- a/python/paddle/trainer_config_helpers/activations.py +++ b/python/paddle/trainer_config_helpers/activations.py @@ -17,7 +17,8 @@ __all__ = [ "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation', 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation", - "LogActivation", "SqrtActivation", "ReciprocalActivation" + "LogActivation", "SqrtActivation", "ReciprocalActivation", + "SoftSignActivation" ] @@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation): Reciprocal Activation. .. math:: - f(z) = 1/z + f(z)=\\frac{1}{z} """ def __init__(self): BaseActivation.__init__(self, 'reciprocal', False) + + +class SoftSignActivation(BaseActivation): + """ + SoftSign Activation. + + .. math:: + f(z)=\\frac{z}{1 + |z|} + """ + + def __init__(self): + BaseActivation.__init__(self, 'softsign', False) diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 57979db4de08989ab583b0ab41589c09789a0921..95797fba8f67bacb421f5c2813ad6332bc53cbc9 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -297,7 +297,7 @@ def auc_evaluator( def pnpair_evaluator( input, label, - info, + query_id, weight=None, name=None, ): """ @@ -308,16 +308,20 @@ def pnpair_evaluator( .. code-block:: python - eval = pnpair_evaluator(input, label, info) + eval = pnpair_evaluator(input, label, query_id) :param input: Input Layer name. The output prediction of network. :type input: LayerOutput :param label: Label layer name. :type label: LayerOutput - :param info: Info layer name. (TODO, explaination) - :type info: LayerOutput + :param query_id: Query_id layer name. Query_id indicates that which query + each sample belongs to. Its shape should be + the same as output of Label layer. + :type query_id: LayerOutput :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. (TODO, explaination) + [sample_num, 1] which indicates the weight of each sample. + The default weight of sample is 1 if the weight layer is None. + And the pair weight is the mean of the two samples' weight. :type weight: LayerOutput :param name: Evaluator name. :type name: None|basestring @@ -326,8 +330,8 @@ def pnpair_evaluator( input = [input] if label: input.append(label) - if info: - input.append(info) + if query_id: + input.append(query_id) evaluator_base( input=input, type="pnpair", diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b59f2f657d7cd0232be87ee2699e00f08ac3dc5b..8e127c9489ca5a4ed190e6d4e12ec4c9b28ad9cf 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -51,6 +51,7 @@ __all__ = [ 'last_seq', 'first_seq', 'cos_sim', + 'l2_distance_layer', 'hsigmoid', 'conv_projection', 'square_error_cost', @@ -115,6 +116,7 @@ __all__ = [ 'huber_classification_cost', 'block_expand_layer', 'maxout_layer', + 'dot_prod_layer', 'out_prod_layer', 'printer_layer', 'print_layer', @@ -167,6 +169,7 @@ class LayerType(object): COST = 'cost' COSINE_SIM_VEC = 'cos_vm' COSINE_SIM = 'cos' + L2_DISTANCE = 'l2_distance' HSIGMOID = 'hsigmoid' CONV_LAYER = 'conv' CONVTRANS_LAYER = 'convt' @@ -197,6 +200,7 @@ class LayerType(object): SCALING_LAYER = 'scaling' TRANS_LAYER = 'trans' ROTATE_LAYER = 'rotate' + DOT_PROD_LAYER = 'dot_prod' OUT_PROD_LAYER = 'out_prod' FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' @@ -888,7 +892,7 @@ def mixed_layer(size=0, :type size: int :param input: The input of this layer. It is an optional parameter. If set, then this function will just return layer's name. - :param act: Activation Type. LinearActivation is the default. + :param act: Activation Type. LinearActivation is the default activation. :type act: BaseActivation :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the @@ -1030,7 +1034,7 @@ def fc_layer(input, :type input: LayerOutput | list | tuple :param size: The layer dimension. :type size: int - :param act: Activation Type. TanhActivation is the default. + :param act: Activation Type. TanhActivation is the default activation. :type act: BaseActivation :param param_attr: The Parameter Attribute|list. :type param_attr: ParameterAttribute @@ -1527,7 +1531,7 @@ def lstmemory(input, :type input: LayerOutput :param reverse: is sequence process reversed or not. :type reverse: bool - :param act: Activation type. TanhActivation is the default. :math:`h_t` + :param act: Activation type. TanhActivation is the default activation. :type act: BaseActivation :param gate_act: gate activation type, SigmoidActivation by default. :type gate_act: BaseActivation @@ -1920,7 +1924,7 @@ def repeat_layer(input, False for treating input as column vector and repeating in the row direction. :type as_row_vector: bool - :param act: Activation type. IdentityActivation is the default. + :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation :type name: basestring :param layer_attr: extra layer attributes. @@ -1974,7 +1978,7 @@ def seq_reshape_layer(input, :type reshape_size: int :param name: The name of this layer. It is optional. :type name: basestring - :param act: Activation type. IdentityActivation is the default. + :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -2332,6 +2336,51 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size) +@wrap_name_default() +@layer_support() +def l2_distance_layer(x, y, name=None, layer_attr=None): + """ + This layer calculates and returns the Euclidean distance between two input + vectors x and y. The equation is as follows: + + .. math:: + l2_distance(\\mathbf{x}, \\mathbf{y}) = \\sqrt{\\sum_{i=1}^D(x_i - y_i)} + + The output size of this layer is fixed to be 1. Note that the above + computation is for one sample. Multiple samples are processed in one batch. + + The example usage is: + + .. code-block:: python + + l2_sim = l2_distance(x=layer1, y=layer2) + + :param name: The name of this layer. It is optional. + :type name: basestring + :param x: The first input x for this layer, whose output is a matrix with + dimensionality N x D. N is the sample number in a mini-batch. + D is the dimensionality of x's output. + :type x: LayerOutput + :param y: The second input y for this layer, whose output is a matrix with + dimensionality N x D. N is the sample number in a mini-batch. + D is the dimensionality of y's output. + :type y: LayerOutput + :param layer_attr: The extra layer attributes, for example, drop rate. + See ExtraLayerAttribute for more details. + :type layer_attr: ExtraLayerAttribute + :return: The returned LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(x, LayerOutput) and isinstance(y, LayerOutput) + Layer( + name=name, + type=LayerType.L2_DISTANCE, + inputs=[x.name, y.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput(name, LayerType.L2_DISTANCE, parents=[x, y], size=1) + + @wrap_name_default() @wrap_bias_attr_default(has_bias=True) @wrap_param_attr_default() @@ -2458,12 +2507,12 @@ def img_conv_layer(input, input is raw pixels of image(mono or RGB), or it may be the previous layer's num_filters * num_group. - There are several group of filter in PaddlePaddle implementation. - Each group will process some channel of the inputs. For example, if an input + There are several groups of filters in PaddlePaddle implementation. + Each group will process some channels of the input. For example, if num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create - 32*4 = 128 filters to process inputs. The channels will be split into 4 - pieces. First 256/4 = 64 channels will process by first 32 filters. The - rest channels will be processed by rest group of filters. + 32*4 = 128 filters to process the input. The channels will be split into 4 + pieces. First 256/4 = 64 channels will be processed by first 32 filters. The + rest channels will be processed by the rest groups of filters. The example usage is: @@ -2479,53 +2528,68 @@ def img_conv_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param filter_size: The x dimension of a filter kernel. Or input a tuple for - two image dimension. + :param filter_size: The dimensions of the filter kernel. If the parameter is + set to one integer, the two dimensions on x and y axises + will be same when filter_size_y is not set. If it is set + to a list, the first element indicates the dimension on + the x axis, and the second is used to specify the dimension + on the y axis when filter_size_y is not provided. :type filter_size: int | tuple | list - :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle - currently supports rectangular filters, the filter's - shape will be (filter_size, filter_size_y). - :type filter_size_y: int | None + :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter + is not set, it will be set automatically according to filter_size. + :type filter_size_y: int :param num_filters: Each filter group's number of filter - :param act: Activation type. ReluActivation is the default. + :param act: Activation type. ReluActivation is the default activation. :type act: BaseActivation - :param groups: Group size of filters. + :param groups: The group number. 1 is the default group number. :type groups: int - :param stride: The x dimension of the stride. Or input a tuple for two image - dimension. + :param stride: The strides. If the parameter is set to one integer, the strides + on x and y axises will be same when stride_y is not set. If it is + set to a list, the first element indicates the stride on the x axis, + and the second is used to specify the stride on the y axis when + stride_y is not provided. 1 is the default value. :type stride: int | tuple | list - :param stride_y: The y dimension of the stride. + :param stride_y: The stride on the y axis. :type stride_y: int - :param padding: The x dimension of the padding. Or input a tuple for two - image dimension + :param padding: The padding sizes. If the parameter is set to one integer, the padding + sizes on x and y axises will be same when padding_y is not set. If it + is set to a list, the first element indicates the padding size on the + x axis, and the second is used to specify the padding size on the y axis + when padding_y is not provided. 0 is the default padding size. :type padding: int | tuple | list - :param padding_y: The y dimension of the padding. + :param padding_y: The padding size on the y axis. :type padding_y: int - :param dilation: The x dimension of the dilation. Or input a tuple for two - image dimension + :param dilation: The dimensions of the dilation. If the parameter is set to one integer, + the two dimensions on x and y axises will be same when dilation_y is not + set. If it is set to a list, the first element indicates the dimension + on the x axis, and the second is used to specify the dimension on the y + axis when dilation_y is not provided. 1 is the default dimension. :type dilation: int | tuple | list - :param dilation_y: The y dimension of the dilation. + :param dilation_y: The dimension of the dilation on the y axis. :type dilation_y: int :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param num_channels: number of input channels. If None will be set - automatically from previous output. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channel number of the input. :type num_channels: int - :param param_attr: Convolution param attribute. None means default attribute + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute - :param shared_biases: Is biases will be shared between filters or not. + :param shared_biases: Whether biases will be shared between filters or not. :type shared_biases: bool - :param layer_attr: Layer Extra Attribute. + :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param trans: true if it is a convTransLayer, false if it is a convLayer + :param trans: True if it is a convTransLayer, False if it is a convLayer :type trans: bool - :param layer_type: specify the layer_type, default is None. If trans=True, - layer_type has to be "exconvt" or "cudnn_convt", - otherwise layer_type has to be either "exconv" or - "cudnn_conv" - :type layer_type: String + :param layer_type: Specify the layer type. If the dilation's dimension on one axis is + larger than 1, layer_type has to be "cudnn_conv" or "cudnn_convt". + If trans=True, layer_type has to be "exconvt" or "cudnn_convt", + otherwise layer_type has to be either "exconv" or "cudnn_conv". + :type layer_type: basestring :return: LayerOutput object. :rtype: LayerOutput """ @@ -2630,7 +2694,7 @@ def img_pool_layer(input, """ Image pooling Layer. - The details of pooling layer, please refer ufldl's pooling_ . + The details of pooling layer, please refer to ufldl's pooling_ . .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ @@ -2662,32 +2726,37 @@ def img_pool_layer(input, padding_y=2, pool_type=MaxPooling()) - :param padding: pooling padding width. + :param padding: The padding size on the x axis. 0 is the default padding size. :type padding: int - :param padding_y: pooling padding height. It's equal to padding by default. - :type padding_y: int | None - :param name: name of pooling layer - :type name: basestring. + :param padding_y: The padding size on the y axis. If the parameter is not set + or set to None, it will be set to 'padding' automatically. + :param name: The name of this layer. It is optional. + :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param pool_size: pooling window width + :param pool_size: The pooling window length on the x axis. :type pool_size: int - :param pool_size_y: pooling window height. It's eaqual to pool_size by default. - :type pool_size_y: int | None - :param num_channels: number of input channel. + :param pool_size_y: The pooling window length on the y axis. If the parameter is + not set or set to None, its actual value will be automatically + set to pool_size. + :type pool_size_y: int + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: pooling type. MaxPooling or AvgPooling. Default is - MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type pool_type: BasePoolingType - :param stride: stride width of pooling. + :param stride: The stride on the x axis. 1 is the default value. :type stride: int - :param stride_y: stride height of pooling. It is equal to stride by default. - :type stride_y: int | None - :param layer_attr: Extra Layer attribute. + :param stride_y: The stride on the y axis. If the parameter is not set or set to + None, its actual value will be automatically set to 'stride'. + :type stride_y: int + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use ceil mode to calculate output height and with. - Defalut is True. If set false, Otherwise use floor. - + :param ceil_mode: Wether to use the ceil function to calculate output height and width. + True is the default. If it is set to False, the floor function will + be used. :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2793,24 +2862,32 @@ def img_pool3d_layer(input, :param padding: pooling padding width. :type padding: int | tuple | list - :param name: name of pooling layer + :param name: The name of this layer. It is optional. :type name: basestring. :param input: The input of this layer. :type input: LayerOutput - :param pool_size: pooling window width + :param pool_size: The pooling window lengths along three axises. If the parameter + is set to one integer, the three lengths will be same. :type pool_size: int | tuple | list - :param num_channels: number of input channel. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: pooling type. MaxPooling or AvgPooling. Default is - MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type pool_type: BasePoolingType - :param stride: stride width of pooling. + :param stride: The strides of the pooling along three axises. If the parameter + is set to one integer, the three strides will be same. 1 is the + default value. :type stride: int | tuple | list - :param layer_attr: Extra Layer attribute. + :param padding: The sizes of padding along three axises. If the parameter is set to + one integer, they will be same. 0 is the default padding size. + :type padding: int | tuple | list + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use ceil mode to calculate output height and with. - Defalut is True. If set false, Otherwise use floor. - + :param ceil_mode: Wether to use the ceil function to calculate output height and width. + True is the default. If it is set to False, the floor function will + be used. :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2889,9 +2966,11 @@ def spp_layer(input, pyramid_height=None, layer_attr=None): """ - Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. - The details please refer to - `Kaiming He's paper `_. + A layer performs spatial pyramid pooling. + + Reference: + Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition + https://arxiv.org/abs/1406.4729 The example usage is: @@ -2906,13 +2985,16 @@ def spp_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param num_channels: number of input channel. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type scale: BasePoolingType - :param pyramid_height: pyramid height. + :param pyramid_height: The pyramid height of this pooling. :type pyramid_height: int - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -2987,8 +3069,10 @@ def img_cmrnorm_layer(input, layer_attr=None): """ Response normalization across feature maps. - The details please refer to - `Alex's paper `_. + + Reference: + ImageNet Classification with Deep Convolutional Neural Networks + http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf The example usage is: @@ -2997,7 +3081,7 @@ def img_cmrnorm_layer(input, norm = img_cmrnorm_layer(input=net, size=5) :param name: The name of this layer. It is optional. - :type name: None | basestring + :type name: basestring :param input: The input of this layer. :type input: LayerOutput :param size: Normalize in number of :math:`size` feature maps. @@ -3006,9 +3090,11 @@ def img_cmrnorm_layer(input, :type scale: float :param power: The hyper-parameter. :type power: float - :param num_channels: input layer's filers number or channels. If - num_channels is None, it will be set automatically. - :param layer_attr: Extra Layer Attribute. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. + :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3032,11 +3118,12 @@ def batch_norm_layer(input, param_attr=None, layer_attr=None, batch_norm_type=None, + epsilon=1e-5, moving_average_fraction=0.9, use_global_stats=None, mean_var_names=None): """ - Batch Normalization Layer. The notation of this layer as follow. + Batch Normalization Layer. The notation of this layer is as follows. :math:`x` is the input features over a mini-batch. @@ -3050,8 +3137,10 @@ def batch_norm_layer(input, \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - The details of batch normalization please refer to this - `paper `_. + Reference: + Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift + http://arxiv.org/abs/1502.03167 The example usage is: @@ -3061,48 +3150,49 @@ def batch_norm_layer(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: batch normalization input. Better be linear activation. - Because there is an activation inside batch_normalization. + :param input: This layer's input which is to be performed batch normalization on. :type input: LayerOutput :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm. batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm requires cuDNN version greater or equal to v4 (>=v4). But cudnn_batch_norm is faster and needs less memory than batch_norm. mkldnn_batch_norm requires - enable use_mkldnn. By default (None), we will - automaticly select cudnn_batch_norm for GPU, + use_mkldnn is enabled. By default (None), we will + automatically select cudnn_batch_norm for GPU, mkldnn_batch_norm for MKLDNN and batch_norm for CPU. - Otherwise, select batch norm type based on the - specified type. If you use cudnn_batch_norm, - we suggested you use latest version, such as v5.1. + Users can specify the batch norm type. If you use + cudnn_batch_norm, we suggested you use latest version, + such as v5.1. :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm" or "mkldnn_batch_norm" - :param act: Activation Type. Better be relu. Because batch - normalization will normalize input near zero. + :param act: Activation type. ReluActivation is the default activation. :type act: BaseActivation - :param num_channels: num of image channels or previous layer's number of - filters. None will automatically get from layer's - input. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param bias_attr: :math:`\\beta`, better be zero when initialize. So the - initial_std=0, initial_mean=1 is best practice. + :param bias_attr: :math:`\\beta`. The bias attribute. If the parameter is set to + False or an object whose type is not ParameterAttribute, no + bias is defined. If the parameter is set to True, the bias is + initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: :math:`\\gamma`, better be one when initialize. So the - initial_std=0, initial_mean=1 is best practice. + :param param_attr: :math:`\\gamma`. The parameter attribute. See ParameterAttribute + for details. :type param_attr: ParameterAttribute - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param use_global_stats: whether use moving mean/variance statistics - during testing peroid. If None or True, - it will use moving mean/variance statistics during - testing. If False, it will use the mean - and variance of current batch of test data for - testing. + :param use_global_stats: Whether use moving mean/variance statistics during + testing peroid. If the parameter is set to None or + True, it will use moving mean/variance statistics + during testing. If the parameter is set to False, it + will use the mean and variance of the current batch + of test data. :type use_global_stats: bool | None. - :param moving_average_fraction: Factor used in the moving average - computation, referred to as facotr, - :math:`runningMean = newMean*(1-factor) - + runningMean*factor` + :param epsilon: The small constant added to the variance to improve numeric stability. + :type epsilon: float. + :param moving_average_fraction: Factor used in the moving average computation. + :math:`runningMean = newMean*(1-factor) + runningMean*factor` :type moving_average_fraction: float. :param mean_var_names: [mean name, variance name] :type mean_var_names: string list @@ -3118,6 +3208,7 @@ def batch_norm_layer(input, assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") + l = Layer( name=name, img3D=img3D, @@ -3127,6 +3218,7 @@ def batch_norm_layer(input, type=LayerType.BATCH_NORM_LAYER, batch_norm_type=batch_norm_type, bias=ParamAttr.to_bias(bias_attr), + epsilon=epsilon, moving_average_fraction=moving_average_fraction, use_global_stats=use_global_stats, mean_var_names=mean_var_names, @@ -3164,8 +3256,9 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. - :type layer_attr: ExtraLayerAttribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute + for details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -3200,7 +3293,8 @@ def row_l2_norm_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute + for details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -3237,31 +3331,27 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): act=ReluActivation(), bias_attr=False) - This layer just simply add all input layers together, then activate the sum - inputs. Each input of this layer should be the same size, which is also the - output size of this layer. + This layer just simply adds all input layers together, then activates the + sum. All inputs should share the same dimension, which is also the dimension + of this layer's output. There is no weight matrix for each input, because it just a simple add operation. If you want a complicated operation before add, please use mixed_layer. - It is a very good way to set dropout outside the layers. Since not all - PaddlePaddle layer support dropout, you can add an add_to layer, set - dropout here. - Please refer to dropout_layer for details. - :param name: The name of this layer. It is optional. :type name: basestring - :param input: Input layers. It could be a LayerOutput or list/tuple of + :param input: The input layers. It could be a LayerOutput or list/tuple of LayerOutput. :type input: LayerOutput | list | tuple - :param act: Activation Type. LinearActivation is the default. + :param act: Activation Type. LinearActivation is the default activation. :type act: BaseActivation :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3300,8 +3390,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): @layer_support(DROPOUT, ERROR_CLIPPING) def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): """ - Concat all input vector into one huge vector. - Inputs can be list of LayerOutput or list of projection. + Concatenate all input vectors to one vector. + Inputs can be a list of LayerOutput or a list of projection. The example usage is: @@ -3311,11 +3401,12 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param input: input layers or projections + :param input: The input layers or projections :type input: list | tuple | collections.Sequence - :param act: Activation type. IdentityActivation is the default. + :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3385,7 +3476,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, bias_attr=None): """ - Concat sequence a with sequence b. + Concatenate sequence a and sequence b. Inputs: - a = [a1, a2, ..., am] @@ -3404,13 +3495,14 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, :param name: The name of this layer. It is optional. :type name: basestring - :param a: input sequence layer + :param a: The first input sequence layer :type a: LayerOutput - :param b: input sequence layer + :param b: The second input sequence layer :type b: LayerOutput - :param act: Activation type. IdentityActivation is the default. + :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the @@ -3447,31 +3539,25 @@ def memory(name, boot_bias_active_type=None, boot_with_const_id=None): """ - The memory layers is a layer cross each time step. Reference this output - as previous time step layer :code:`name` 's output. - - The default memory is zero in first time step, previous time step's - output in the rest time steps. + The memory takes a layer's output at previous time step as its own output. - If boot_bias, the first time step value is this bias and - with activation. + If boot_bias, the activation of the bias is the initial value of the memory. - If boot_with_const_id, then the first time stop is a IndexSlot, the - Arguments.ids()[0] is this :code:`cost_id`. + If boot_with_const_id is set, then the memory's output at the first time step + is a IndexSlot, the Arguments.ids()[0] is this :code:`cost_id`. - If boot_layer is not null, the memory is just the boot_layer's output. - Set :code:`is_seq` is true boot layer is sequence. + If boot_layer is specified, the memory's output at the first time step will + be the boot_layer's output. - The same name layer in recurrent group will set memory on each time - step. + In other case, the default memory's output at the first time step is zero. .. code-block:: python mem = memory(size=256, name='state') state = fc_layer(input=mem, size=256, name='state') - If you do not want to specify the name, you can equivalently use set_input() - to specify the layer needs to be remembered as the following: + If you do not want to specify the name, you can also use set_input() + to specify the layer to be remembered as the following: .. code-block:: python @@ -3479,26 +3565,31 @@ def memory(name, state = fc_layer(input=mem, size=256) mem.set_input(mem) - :param name: the name of the layer which this memory remembers. + :param name: The name of the layer which this memory remembers. If name is None, user should call set_input() to specify the name of the layer which this memory remembers. :type name: basestring - :param size: size of memory. + :param size: The dimensionality of memory. :type size: int - :param memory_name: the name of the memory. - It is ignored when name is provided. + :param memory_name: The name of the memory. It is ignored when name is provided. :type memory_name: basestring :param is_seq: DEPRECATED. is sequence for boot_layer :type is_seq: bool - :param boot_layer: boot layer of memory. + :param boot_layer: This parameter specifies memory's output at the first time + step and the output is boot_layer's output. :type boot_layer: LayerOutput | None - :param boot_bias: boot layer's bias + :param boot_bias: The bias attribute of memory's output at the first time step. + If the parameter is set to False or an object whose type is not + ParameterAttribute, no bias is defined. If the parameter is set + to True, the bias is initialized to zero. :type boot_bias: ParameterAttribute | None - :param boot_bias_active_type: boot layer's active type. + :param boot_bias_active_type: Activation type for memory's bias at the first time + step. LinearActivation is the default activation. :type boot_bias_active_type: BaseActivation - :param boot_with_const_id: boot layer's id. + :param boot_with_const_id: This parameter specifies memory's output at the first + time step and the output is an index. :type boot_with_const_id: int - :return: LayerOutput object which is a memory. + :return: LayerOutput object. :rtype: LayerOutput """ if boot_bias_active_type is None: @@ -3574,30 +3665,32 @@ def lstm_step_layer(input, ... - This layer has two outputs. Default output is :math:`h_t`. The other - output is :math:`o_t`, whose name is 'state' and can use + This layer has two outputs. The default output is :math:`h_t`. The other + output is :math:`o_t`, whose name is 'state' and users can use :code:`get_output_layer` to extract this output. :param name: The name of this layer. It is optional. :type name: basestring - :param size: Layer's size. NOTE: lstm layer's size, should be equal to - :code:`input.size/4`, and should be equal to - :code:`state.size`. + :param size: The dimension of this layer's output, which must be + equal to the dimension of the state. :type size: int - :param input: input layer. :math:`Wx_t + Wh_{t-1}` + :param input: The input of this layer. :type input: LayerOutput - :param state: State Layer. :math:`c_{t-1}` + :param state: The state of the LSTM unit. :type state: LayerOutput - :param act: Activation type. TanhActivation is the default. + :param act: Activation type. TanhActivation is the default activation. :type act: BaseActivation - :param gate_act: Gate Activation Type. SigmoidActivation is the default. + :param gate_act: Activation type of the gate. SigmoidActivation is the + default activation. :type gate_act: BaseActivation - :param state_act: State Activation Type. TanhActivation is the default. + :param state_act: Activation type of the state. TanhActivation is the + default activation. :type state_act: BaseActivation - :param bias_attr: The parameter attribute for bias. If this parameter is - set to True or None, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | True - :param layer_attr: layer's extra attribute. + :param bias_attr: The bias attribute. If the parameter is set to False or an object + whose type is not ParameterAttribute, no bias is defined. If the + parameter is set to True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute | None | bool | Any + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3642,23 +3735,31 @@ def gru_step_layer(input, layer_attr=None): """ - :param input: + :param input: The input of this layer, whose dimension can be divided by 3. :type input: LayerOutput - :param output_mem: - :param size: - :param act: + :param output_mem: A memory which memorizes the output of this layer at previous + time step. + :type output_mem: LayerOutput + :param size: The dimension of this layer's output. If it is not set or set to None, + it will be set to one-third of the dimension of the input automatically. + :type size: int + :param act: Activation type of this layer's output. TanhActivation + is the default activation. :type act: BaseActivation :param name: The name of this layer. It is optional. - :param gate_act: Activation type of this layer's two gates. Default is Sigmoid. + :type name: basestring + :param gate_act: Activation type of this layer's two gates. SigmoidActivation is + the default activation. :type gate_act: BaseActivation :param bias_attr: The parameter attribute for bias. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If this parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: the parameter_attribute for transforming the output_mem - from previous step. - :param layer_attr: + :param param_attr: The parameter attribute. See ParameterAttribute for details. + :type param_attr: ParameterAttribute + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -3703,25 +3804,34 @@ def gru_step_naive_layer(input, param_attr=None, layer_attr=None): """ - GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING + GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING and DROPOUT. - :param input: - :param output_mem: - :param size: + :param input: The input of this layer, whose dimensionality can be divided by 3. + :param output_mem: A memory which memorizes the output of this layer at previous + time step. + :type output_mem: LayerOutput + :param size: The dimension of this layer's output. If it is not set or set to None, + it will be set to one-third of the dimension of the input automatically. + :type size: int :param name: The name of this layer. It is optional. - :param act: + :type name: basestring + :param act: Activation type of this layer's output. TanhActivation + is the default activation. :type act: BaseActivation - :param gate_act: Activation type of this layer's two gates. Default is Sigmoid. + :param gate_act: Activation type of this layer's two gates. SigmoidActivation + is the default activation. :type gate_act: BaseActivation :param bias_attr: The parameter attribute for bias. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If this parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: - :param layer_attr: - :return: + :param param_attr: The parameter attribute. See ParameterAttribute for details. + :type param_attr: ParameterAttribute + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. :rtype: LayerOutput """ if input.size % 3 != 0: @@ -3783,12 +3893,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param input: get output layer's input. And this layer should contains + :param input: The input layer. And this layer should contain multiple outputs. :type input: LayerOutput - :param arg_name: Output name from input. + :param arg_name: The name of the output to be extracted from the input layer. :type arg_name: basestring - :param layer_attr: Layer's extra attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :return: LayerOutput object. :rtype: LayerOutput """ @@ -3845,18 +3956,20 @@ def recurrent_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param act: Activation type. TanhActivation is the default. + :param act: Activation type. TanhActivation is the default activation. :type act: BaseActivation - :param bias_attr: The parameter attribute for bias. If this parameter is set to + :param bias_attr: The parameter attribute for bias. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: parameter attribute. + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3881,7 +3994,7 @@ def recurrent_layer(input, class StaticInput(object): """ StaticInput is only used in recurrent_group which defines a read-only memory - that can be a sequence or non-sequence. + and can be a sequence or non-sequence. :param size: DEPRECATED :param is_seq: DEPRECATED """ @@ -3914,8 +4027,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a time step, PaddlePaddle will iterate such a recurrent calculation over - sequence input. This is extremely usefull for attention based model, or - Neural Turning Machine like models. + sequence input. This is useful for attention-based models, or Neural + Turning Machine like models. The basic usage (time steps) is: @@ -3937,18 +4050,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): demo/seqToseq/seqToseq_net.py - sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf - :param step: recurrent one time step function.The input of this function is - input of the group. The return of this function will be - recurrent group's return value. + :param step: A step function which takes the input of recurrent_group as its own + input and returns values as recurrent_group's output every time step. - The recurrent group scatter a sequence into time steps. And - for each time step, will invoke step function, and return - a time step result. Then gather each time step of output into + The recurrent group scatters a sequence into time steps. And + for each time step, it will invoke step function, and return + a time step result. Then gather outputs of each time step into layer group's output. :type step: callable - :param name: recurrent_group's name. + :param name: The recurrent_group's name. It is optional. :type name: basestring :param input: Input links array. @@ -3956,11 +4068,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): LayerOutput will be scattered into time steps. SubsequenceInput will be scattered into sequence steps. StaticInput will be imported to each time step, and doesn't change - through time. It's a mechanism to access layer outside step function. + over time. It's a mechanism to access layer outside step function. :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple - :param reverse: If reverse is set true, the recurrent unit will process the + :param reverse: If reverse is set to True, the recurrent unit will process the input sequence in a reverse order. :type reverse: bool @@ -4095,7 +4207,8 @@ def maxid_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -4114,6 +4227,45 @@ def maxid_layer(input, name=None, layer_attr=None): size=l.config.size) +@wrap_name_default() +def dot_prod_layer(input1, input2, name=None, layer_attr=None): + """ + A layer for computing the dot product of two vectors. + + The example usage is: + + .. code-block:: python + + dot_prod = dot_prod_layer(input1=vec1, input2=vec2) + + :param name: The name of this layer. It is optional. + :type name: basestring + :param input1: The first input layer. + :type input: LayerOutput + :param input2: The second input layer. + :type input2: LayerOutput + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input1, LayerOutput) + assert isinstance(input2, LayerOutput) + assert input1.size == input2.size, ("Two inputs should have the same size.") + + l = Layer( + name=name, + type=LayerType.DOT_PROD_LAYER, + inputs=[input1.name, input2.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.DOT_PROD_LAYER, + parents=[input1, input2], + size=l.config.size) + + @wrap_name_default() def out_prod_layer(input1, input2, name=None, layer_attr=None): """ @@ -4128,11 +4280,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param input1: The first input layer name. + :param input1: The first input layer. :type input: LayerOutput - :param input2: The second input layer name. + :param input2: The second input layer. :type input2: LayerOutput - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -4171,9 +4324,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param eos_id: end id of sequence + :param eos_id: End id of sequence :type eos_id: int - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -4234,8 +4388,9 @@ def beam_search(step, - machine translation : demo/seqToseq/translation/gen.conf \ demo/seqToseq/seqToseq_net.py - :param name: Name of the recurrent unit that generates sequences. - :type name: base string + :param name: The name of the recurrent unit that is responsible for + generating sequences. It is optional. + :type name: basestring :param step: A callable function that defines the calculation in a time step, and it is applied to sequences with arbitrary length by sharing a same set of weights. @@ -4360,16 +4515,18 @@ def square_error_cost(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: Network prediction. + :param input: The first input layer. :type input: LayerOutput - :param label: Data label. + :param label: The input label. :type label: LayerOutput - :param weight: The weight affects the cost, namely the scale of cost. - It is an optional argument. + :param weight: The weight layer defines a weight for each sample in the + mini-batch. It is optional. :type weight: LayerOutput - :param coeff: The coefficient affects the gradient in the backward. + :param coeff: The weight of the gradient in the back propagation. + 1.0 is the default value. :type coeff: float - :param layer_attr: layer's extra attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -4402,17 +4559,20 @@ def classification_cost(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: input layer name. network output. + :param input: The first input layer. :type input: LayerOutput - :param label: label layer name. data_layer often. + :param label: The input label. :type label: LayerOutput - :param weight: The weight affects the cost, namely the scale of cost. - It is an optional argument. + :param weight: The weight layer defines a weight for each sample in the + mini-batch. It is optional. :type weight: LayerOutput - :param evaluator: Evaluator method. - :param layer_attr: layer's extra attribute. + :param evaluator: Evaluator method. classification_error_evaluator is the default. + :type evaluator: Evaluator method + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param coeff: The coefficient affects the gradient in the backward. + :param coeff: The weight of the gradient in the back propagation. + 1.0 is the default value. :type coeff: float :return: LayerOutput object. :rtype: LayerOutput @@ -4465,7 +4625,7 @@ def conv_operator(img, Different from img_conv_layer, conv_op is an Operator, which can be used in mixed_layer. And conv_op takes two inputs to perform convolution. The first input is the image and the second is filter kernel. It only - support GPU mode. + supports GPU mode. The example usage is: @@ -4477,27 +4637,31 @@ def conv_operator(img, num_filters=64, num_channels=64) - :param img: input image + :param img: The input image. :type img: LayerOutput - :param filter: input filter + :param filter: The input filter. :type filter: LayerOutput - :param filter_size: The x dimension of a filter kernel. + :param filter_size: The dimension of the filter kernel on the x axis. :type filter_size: int - :param filter_size_y: The y dimension of a filter kernel. Since - PaddlePaddle now supports rectangular filters, - the filter's shape can be (filter_size, filter_size_y). + :param filter_size_y: The dimension of the filter kernel on the y axis. + If the parameter is not set or set to None, it will + set to 'filter_size' automatically. :type filter_size_y: int - :param num_filters: channel of output data. + :param num_filters: The number of the output channels. :type num_filters: int - :param num_channels: channel of input data. + :param num_channels: The number of the input channels. If the parameter is not set + or set to None, it will be automatically set to the channel + number of the 'img'. :type num_channels: int - :param stride: The x dimension of the stride. + :param stride: The stride on the x axis. :type stride: int - :param stride_y: The y dimension of the stride. + :param stride_y: The stride on the y axis. If the parameter is not set or + set to None, it will be set to 'stride' automatically. :type stride_y: int - :param padding: The x dimension of padding. + :param padding: The padding size on the x axis. :type padding: int - :param padding_y: The y dimension of padding. + :param padding_y: The padding size on the y axis. If the parameter is not set + or set to None, it will be set to 'padding' automatically. :type padding_y: int :return: A ConvOperator Object. :rtype: ConvOperator @@ -4548,9 +4712,9 @@ def conv_projection(input, param_attr=None, trans=False): """ - Different from img_conv_layer and conv_op, conv_projection is an Projection, - which can be used in mixed_layer and conat_layer. It use cudnn to implement - conv and only support GPU mode. + Different from img_conv_layer and conv_op, conv_projection is a Projection, + which can be used in mixed_layer and concat_layer. It uses cudnn to implement + convolution and only supports GPU mode. The example usage is: @@ -4563,32 +4727,45 @@ def conv_projection(input, :param input: The input of this layer. :type input: LayerOutput - :param filter_size: The x dimension of a filter kernel. - :type filter_size: int - :param filter_size_y: The y dimension of a filter kernel. Since - PaddlePaddle now supports rectangular filters, - the filter's shape can be (filter_size, filter_size_y). + :param filter_size: The dimensions of the filter kernel. If the parameter is + set to one integer, the two dimensions on x and y axises + will be same when filter_size_y is not set. If it is set + to a list, the first element indicates the dimension on + the x axis, and the second is used to specify the dimension + on the y axis when filter_size_y is not provided. + :type filter_size: int | tuple | list + :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter + is not set, it will be set automatically according to filter_size. :type filter_size_y: int - :param num_filters: channel of output data. + :param num_filters: The number of filters. :type num_filters: int - :param num_channels: channel of input data. + :param num_channels: The number of the input channels. :type num_channels: int - :param stride: The x dimension of the stride. - :type stride: int - :param stride_y: The y dimension of the stride. + :param stride: The strides. If the parameter is set to one integer, the strides + on x and y axises will be same when stride_y is not set. If it is + set to a list, the first element indicates the stride on the x axis, + and the second is used to specify the stride on the y axis when + stride_y is not provided. + :type stride: int | tuple | list + :param stride_y: The stride on the y axis. :type stride_y: int - :param padding: The x dimension of padding. - :type padding: int - :param padding_y: The y dimension of padding. + :param padding: The padding sizes. If the parameter is set to one integer, the padding + sizes on x and y axises will be same when padding_y is not set. If it + is set to a list, the first element indicates the padding size on the + x axis, and the second is used to specify the padding size on the y axis + when padding_y is not provided. + :type padding: int | tuple | list + :param padding_y: The padding size on the y axis. :type padding_y: int :param groups: The group number. :type groups: int - :param param_attr: Convolution param attribute. None means default attribute + :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for + details. :type param_attr: ParameterAttribute - :param trans: whether it is convTrans or conv + :param trans: Whether it is ConvTransProjection or ConvProjection :type trans: bool - :return: A DotMulProjection Object. - :rtype: DotMulProjection + :return: A Projection Object. + :rtype: ConvTransProjection | ConvProjection """ if num_channels is None: assert input.num_filters is not None @@ -4653,13 +4830,13 @@ def pad_layer(input, layer_attr=None): """ This operation pads zeros to the input data according to pad_c,pad_h - and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size - of padding. And the input data shape is NCHW. + and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding + dimension. And the input data shape is NCHW. - For example, pad_c=[2,3] means padding 2 zeros before the - input data and 3 zeros after the input data in channel dimension. - pad_h means padding zeros in height dimension. pad_w means padding zeros - in width dimension. + For example, pad_c=[2,3] means padding 2 zeros before the input data + and 3 zeros after the input data in the channel dimension. pad_h means + padding zeros in the height dimension. pad_w means padding zeros in the + width dimension. For example, @@ -4696,13 +4873,14 @@ def pad_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param pad_c: padding size in channel dimension. + :param pad_c: The padding size in the channel dimension. :type pad_c: list | None - :param pad_h: padding size in height dimension. + :param pad_h: The padding size in the height dimension. :type pad_h: list | None - :param pad_w: padding size in width dimension. + :param pad_w: The padding size in the width dimension. :type pad_w: list | None - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :param name: The name of this layer. It is optional. :type name: basestring @@ -4751,7 +4929,7 @@ def pad_layer(input, @layer_support() def conv_shift_layer(a, b, name=None, layer_attr=None): """ - This layer performs cyclic convolution for two input. For example: + This layer performs cyclic convolution on two inputs. For example: - a[in]: contains M elements. - b[in]: contains N elements (N should be odd). - c[out]: contains M elements. @@ -4760,7 +4938,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} - In this formular: + In this formula: - a's index is computed modulo M. When it is negative, then get item from the right side (which is the end of array) to the left. - b's index is computed modulo N. When it is negative, then get item from @@ -4774,11 +4952,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param a: Input layer a. + :param a: The first input of this layer. :type a: LayerOutput - :param b: input layer b. + :param b: The second input of this layer. :type b: LayerOutput - :param layer_attr: layer's extra attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -4809,8 +4988,8 @@ def tensor_layer(a, bias_attr=None, layer_attr=None): """ - This layer performs tensor operation for two input. - For example, each sample: + This layer performs tensor operation on two inputs. + For example: .. math:: y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1 @@ -4830,22 +5009,24 @@ def tensor_layer(a, :param name: The name of this layer. It is optional. :type name: basestring - :param a: Input layer a. + :param a: The first input of this layer. :type a: LayerOutput - :param b: input layer b. + :param b: The second input of this layer. :type b: LayerOutput - :param size: the layer dimension. - :type size: int. - :param act: Activation type. LinearActivation is the default. + :param size: The dimension of this layer. + :type size: int + :param act: Activation type. LinearActivation is the default activation. :type act: BaseActivation - :param param_attr: The Parameter Attribute. + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute :param bias_attr: The parameter attribute for bias. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If this parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer config. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute | None :return: LayerOutput object. :rtype: LayerOutput @@ -4881,7 +5062,7 @@ def selective_fc_layer(input, layer_attr=None): """ Selectived fully connected layer. Different from fc_layer, the output - of this layer maybe sparse. It requires an additional input to indicate + of this layer can be sparse. It requires an additional input to indicate several selected columns for output. If the selected columns is not specified, selective_fc_layer acts exactly like fc_layer. @@ -4895,22 +5076,34 @@ def selective_fc_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput | list | tuple - :param select: The select layer. The output of select layer should be a - sparse binary matrix, and treat as the mask of selective fc. - If is None, acts exactly like fc_layer. + :param select: The layer to select columns to output. It should be a sparse + binary matrix, and is treated as the mask of selective fc. If + it is not set or set to None, selective_fc_layer acts exactly + like fc_layer. :type select: LayerOutput - :param size: The layer dimension. + :param size: The dimension of this layer, which should be equal to that of + the layer 'select'. :type size: int - :param act: Activation type. TanhActivation is the default. + :param act: Activation type. TanhActivation is the default activation. :type act: BaseActivation - :param param_attr: The Parameter Attribute. + :param pass_generation: The flag which indicates whether it is during generation. + :type pass_generation: bool + :param has_selected_colums: The flag which indicates whether the parameter 'select' + has been set. True is the default. + :type has_selected_colums: bool + :param mul_ratio: A ratio helps to judge how sparse the output is and determine + the computation method for speed consideration. + :type mul_ratio: float + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute :param bias_attr: The parameter attribute for bias. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If this parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer config. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute | None :return: LayerOutput object. :rtype: LayerOutput @@ -4961,7 +5154,7 @@ def selective_fc_layer(input, @layer_support() def sampling_id_layer(input, name=None, layer_attr=None): """ - A layer for sampling id from multinomial distribution from the input layer. + A layer for sampling id from a multinomial distribution from the input layer. Sampling one id for one sample. The simple usage is: @@ -4974,8 +5167,9 @@ def sampling_id_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -4996,8 +5190,7 @@ def slope_intercept_layer(input, intercept=0.0, layer_attr=None): """ - This layer for applying a slope and an intercept to the input - element-wise. There is no activation and weight. + This layer for applying a slope and an intercept to the input. .. math:: y = slope * x + intercept @@ -5012,12 +5205,13 @@ def slope_intercept_layer(input, :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param slope: the scale factor. - :type slope: float. - :param intercept: the offset. - :type intercept: float. - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param slope: The scale factor. + :type slope: float + :param intercept: The offset. + :type intercept: float + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5072,12 +5266,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): :type weights: LayerOutput :param vectors: The vector layer. :type vectors: LayerOutput - :param size: the dimension of this layer. + :param size: The dimension of this layer. :type size: int :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5124,11 +5319,11 @@ def block_expand_layer(input, outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x - The expand method is the same with ExpandConvLayer, but saved the transposed + The expanding method is the same with ExpandConvLayer, but saved the transposed value. After expanding, output.sequenceStartPositions will store timeline. - The number of time steps are outputH * outputW and the dimension of each + The number of time steps is outputH * outputW and the dimension of each time step is block_y * block_x * num_channels. This layer can be used after - convolution neural network, and before recurrent neural network. + convolutional neural network, and before recurrent neural network. The simple usage is: @@ -5143,8 +5338,10 @@ def block_expand_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param num_channels: The channel number of input layer. - :type num_channels: int | None + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. + :type num_channels: int :param block_x: The width of sub block. :type block_x: int :param block_y: The width of sub block. @@ -5158,9 +5355,10 @@ def block_expand_layer(input, :param padding_y: The padding size in vertical direction. :type padding_y: int :param name: The name of this layer. It is optional. - :type name: None | basestring. - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :type name: basestring. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5190,12 +5388,19 @@ def block_expand_layer(input, @layer_support() def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): """ - A layer to do max out on conv layer output. - - Input: output of a conv layer. - - Output: feature map size same as input. Channel is (input channel) / groups. + A layer to do max out on convolutional layer output. + - Input: the output of a convolutional layer. + - Output: feature map size same as the input's, and its channel number is + (input channel) / groups. So groups should be larger than 1, and the num of channels should be able - to devided by groups. + to be devided by groups. + + Reference: + Maxout Networks + http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf + Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks + https://arxiv.org/pdf/1312.6082v4.pdf .. math:: y_{si+j} = \max_k x_{gsi + sk + j} @@ -5205,12 +5410,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): 0 \le j < s 0 \le k < groups - Please refer to Paper: - - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf - - Multi-digit Number Recognition from Street View \ - Imagery using Deep Convolutional Neural Networks: \ - https://arxiv.org/pdf/1312.6082v4.pdf - The simple usage is: .. code-block:: python @@ -5221,14 +5420,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param num_channels: The channel number of input layer. If None will be set - automatically from previous output. - :type num_channels: int | None + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. + :type num_channels: int :param groups: The group number of input layer. :type groups: int :param name: The name of this layer. It is optional. - :type name: None | basestring. - :param layer_attr: Extra Layer attribute. + :type name: basestring + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -5260,20 +5461,20 @@ def ctc_layer(input, layer_attr=None): """ Connectionist Temporal Classification (CTC) is designed for temporal - classication task. That is, for sequence labeling problems where the + classication task. e.g. sequence labeling problems where the alignment between the inputs and the target labels is unknown. - More details can be found by referring to `Connectionist Temporal - Classification: Labelling Unsegmented Sequence Data with Recurrent - Neural Networks `_ + Reference: + Connectionist Temporal Classification: Labelling Unsegmented Sequence Data + with Recurrent Neural Networks + http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf Note: - Considering the 'blank' label needed by CTC, you need to use - (num_classes + 1) as the input size. num_classes is the category number. - And the 'blank' is the last category index. So the size of 'input' layer, such as - fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer - should also be num_classes + 1. + Considering the 'blank' label needed by CTC, you need to use (num_classes + 1) + as the size of the input, where num_classes is the category number. + And the 'blank' is the last category index. So the size of 'input' layer (e.g. + fc_layer with softmax activation) should be (num_classes + 1). The size of + ctc_layer should also be (num_classes + 1). The example usage is: @@ -5286,16 +5487,17 @@ def ctc_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param label: The data layer of label with variable length. + :param label: The input label. :type label: LayerOutput - :param size: category numbers + 1. + :param size: The dimension of this layer, which must be equal to (category number + 1). :type size: int :param name: The name of this layer. It is optional. - :type name: basestring | None - :param norm_by_times: Whether to normalization by times. False by default. + :type name: basestring + :param norm_by_times: Whether to do normalization by times. False is the default. :type norm_by_times: bool - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5336,20 +5538,19 @@ def warp_ctc_layer(input, building process, PaddlePaddle will clone the source codes, build and install it to :code:`third_party/install/warpctc` directory. - More details of CTC can be found by referring to `Connectionist Temporal - Classification: Labelling Unsegmented Sequence Data with Recurrent - Neural Networks `_. + Reference: + Connectionist Temporal Classification: Labelling Unsegmented Sequence Data + with Recurrent Neural Networks + http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf Note: - - Let num_classes represent the category number. Considering the 'blank' - label needed by CTC, you need to use (num_classes + 1) as the input size. - Thus, the size of both warp_ctc layer and 'input' layer should be set to - num_classes + 1. + - Let num_classes represents the category number. Considering the 'blank' + label needed by CTC, you need to use (num_classes + 1) as the size of + warp_ctc layer. - You can set 'blank' to any value ranged in [0, num_classes], which - should be consistent as that used in your labels. + should be consistent with those used in your labels. - As a native 'softmax' activation is interated to the warp-ctc library, - 'linear' activation is expected instead in the 'input' layer. + 'linear' activation is expected to be used instead in the 'input' layer. The example usage is: @@ -5363,18 +5564,19 @@ def warp_ctc_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param label: The data layer of label with variable length. + :param label: The input label. :type label: LayerOutput - :param size: category numbers + 1. + :param size: The dimension of this layer, which must be equal to (category number + 1). :type size: int :param name: The name of this layer. It is optional. - :type name: basestring | None - :param blank: the 'blank' label used in ctc + :type name: basestring + :param blank: The 'blank' label used in ctc. :type blank: int - :param norm_by_times: Whether to normalization by times. False by default. + :param norm_by_times: Whether to do normalization by times. False is the default. :type norm_by_times: bool - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5420,23 +5622,26 @@ def crf_layer(input, label=label, size=label_dim) - :param input: The first input layer is the feature. + :param input: The first input layer. :type input: LayerOutput - :param label: The second input layer is label. + :param label: The input label. :type label: LayerOutput :param size: The category number. :type size: int - :param weight: The third layer is "weight" of each sample, which is an - optional argument. + :param weight: The weight layer defines a weight for each sample in the + mini-batch. It is optional. :type weight: LayerOutput - :param param_attr: Parameter attribute. None means default attribute + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute :param name: The name of this layer. It is optional. - :type name: None | basestring - :param coeff: The coefficient affects the gradient in the backward. + :type name: basestring + :param coeff: The weight of the gradient in the back propagation. + 1.0 is the default value. :type coeff: float - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5482,9 +5687,9 @@ def crf_decoding_layer(input, """ A layer for calculating the decoding sequence of sequential conditional random field model. The decoding sequence is stored in output.ids. - If a second input is provided, it is treated as the ground-truth label, and - this layer will also calculate error. output.value[i] is 1 for incorrect - decoding or 0 for correct decoding. + If the input 'label' is provided, it is treated as the ground-truth label, and + this layer will also calculate error. output.value[i] is 1 for an incorrect + decoding and 0 for the correct. The example usage is: @@ -5495,16 +5700,18 @@ def crf_decoding_layer(input, :param input: The first input layer. :type input: LayerOutput - :param size: size of this layer. + :param size: The dimension of this layer. :type size: int - :param label: None or ground-truth label. - :type label: LayerOutput or None - :param param_attr: Parameter attribute. None means default attribute + :param label: The input label. + :type label: LayerOutput | None + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute :param name: The name of this layer. It is optional. - :type name: None | basestring - :param layer_attr: Extra Layer config. - :type layer_attr: ExtraLayerAttribute | None + :type name: basestring + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -5551,8 +5758,7 @@ def nce_layer(input, bias_attr=None, layer_attr=None): """ - Noise-contrastive estimation. This layer implements the method in the - following paper: + Noise-contrastive estimation. Reference: A fast and simple algorithm for training neural probabilistic language @@ -5568,25 +5774,27 @@ def nce_layer(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: The input layers. It should be a LayerOutput or a list/tuple - of LayerOutput. + :param input: The first input of this layer. :type input: LayerOutput | list | tuple | collections.Sequence - :param label: The ground truth. + :param label: The input label. :type label: LayerOutput :param weight: The weight layer defines a weight for each sample in the - mini-batch. The default value is None. + mini-batch. It is optional. :type weight: LayerOutput - :param num_classes: The class number. + :param num_classes: The number of classes. :type num_classes: int - :param param_attr: The parameter attributes. - :type param_attr: ParameterAttribute|list - :param num_neg_samples: The number of sampled negative labels. The default - value is 10. + :param act: Activation type. SigmoidActivation is the default activation. + :type act: BaseActivation + :param param_attr: The parameter attribute. See ParameterAttribute for + details. + :type param_attr: ParameterAttribute + :param num_neg_samples: The number of sampled negative labels. 10 is the + default value. :type num_neg_samples: int :param neg_distribution: The discrete noisy distribution over the output space from which num_neg_samples negative labels are sampled. If this parameter is not set, a - uniform distribution will be used. A user defined + uniform distribution will be used. A user-defined distribution is a list whose length must be equal to the num_classes. Each member of the list defines the probability of a class given input x. @@ -5596,9 +5804,10 @@ def nce_layer(input, no bias is defined. If this parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :return: The LayerOutput object. + :return: LayerOutput object. :rtype: LayerOutput """ if isinstance(input, LayerOutput): @@ -5665,11 +5874,11 @@ def rank_cost(left, coeff=1.0, layer_attr=None): """ - A cost Layer for learning to rank using gradient descent. Details can refer - to `papers `_. - This layer contains at least three inputs. The weight is an optional - argument, which affects the cost. + A cost Layer for learning to rank using gradient descent. + + Reference: + Learning to Rank using Gradient Descent + http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf .. math:: @@ -5700,14 +5909,16 @@ def rank_cost(left, :type right: LayerOutput :param label: Label is 1 or 0, means positive order and reverse order. :type label: LayerOutput - :param weight: The weight affects the cost, namely the scale of cost. - It is an optional argument. + :param weight: The weight layer defines a weight for each sample in the + mini-batch. It is optional. :type weight: LayerOutput :param name: The name of this layer. It is optional. - :type name: None | basestring - :param coeff: The coefficient affects the gradient in the backward. + :type name: basestring + :param coeff: The weight of the gradient in the back propagation. + 1.0 is the default value. :type coeff: float - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -5752,25 +5963,25 @@ def lambda_cost(input, NDCG_num=8, max_sort_size=-1) - :param input: Samples of the same query should be loaded as sequence. + :param input: The first input of this layer, which is often a document + samples list of the same query and whose type must be sequence. :type input: LayerOutput - :param score: The 2nd input. Score of each sample. + :param score: The scores of the samples. :type input: LayerOutput :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), e.g., 5 for NDCG@5. It must be less than or equal to the - minimum size of lists. + minimum size of the list. :type NDCG_num: int - :param max_sort_size: The size of partial sorting in calculating gradient. - If max_sort_size = -1, then for each list, the - algorithm will sort the entire list to get gradient. - In other cases, max_sort_size must be greater than or - equal to NDCG_num. And if max_sort_size is greater - than the size of a list, the algorithm will sort the - entire list of get gradient. + :param max_sort_size: The size of partial sorting in calculating gradient. If + max_sort_size is equal to -1 or greater than the number + of the samples in the list, then the algorithm will sort + the entire list to compute the gradient. In other cases, + max_sort_size must be greater than or equal to NDCG_num. :type max_sort_size: int :param name: The name of this layer. It is optional. - :type name: None | basestring - :param layer_attr: Extra Layer Attribute. + :type name: basestring + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -5815,11 +6026,10 @@ def cross_entropy(input, :param name: The name of this layer. It is optional. :type name: basestring :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float - :param weight: The cost of each sample is multiplied with each weight. - The weight should be a layer with size=1. Note that gradient - will not be calculated for weight. + :param weight: The weight layer defines a weight for each sample in the + mini-batch. It is optional. :type weight: LayerOutout :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. @@ -5864,7 +6074,7 @@ def cross_entropy_with_selfnorm(input, :param name: The name of this layer. It is optional. :type name: basestring :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float :param softmax_selfnorm_alpha: The scale factor affects the cost. :type softmax_selfnorm_alpha: float @@ -5954,7 +6164,7 @@ def huber_regression_cost(input, :param delta: The difference between the observed and predicted values. :type delta: float :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. @@ -6004,7 +6214,7 @@ def huber_classification_cost(input, :param name: The name of this layer. It is optional. :type name: basestring :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. @@ -6049,7 +6259,7 @@ def multi_binary_label_cross_entropy(input, :param name: The name of this layer. It is optional. :type name: basestring :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. @@ -6220,7 +6430,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring :param coeff: The weight of the gradient in the back propagation. - 1.0 is the default. + 1.0 is the default value. :type coeff: float :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. @@ -6372,7 +6582,7 @@ def row_conv_layer(input, :param context_len: The context length equals the lookahead step number plus one. :type context_len: int - :param act: Activation Type. LinearActivation is the default. + :param act: Activation Type. LinearActivation is the default activation. :type act: BaseActivation :param param_attr: The parameter attribute. See ParameterAttribute for details. @@ -6399,10 +6609,11 @@ def row_conv_layer(input, @layer_support() @wrap_name_default() -@wrap_param_attr_default() def prelu_layer(input, name=None, partial_sum=1, + channel_shared=None, + num_channels=None, param_attr=None, layer_attr=None): """ @@ -6433,6 +6644,14 @@ def prelu_layer(input, - partial_sum = number of outputs, indicates all elements share the same weight. :type partial_sum: int + :param channel_shared: whether or not the parameter are shared across channels. + + - channel_shared = True, we set the partial_sum to the number of outputs. + - channel_shared = False, we set the partial_sum to the number of elements in one channel. + + :type channel_shared: bool + :param num_channels: number of input channel. + :type num_channels: int :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for @@ -6443,7 +6662,25 @@ def prelu_layer(input, """ assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.' - assert isinstance(param_attr, ParameterAttribute) + + if not param_attr: + param_attr = ParamAttr(initial_mean=0.25, initial_std=0.0) + else: + assert isinstance(param_attr, ParameterAttribute) + + if num_channels is None: + assert input.num_filters is not None, \ + 'the input channel cannot be detected, please specify the num_channels parameter' + num_channels = input.num_filters + + if channel_shared is not None: + assert isinstance(channel_shared, bool) + assert (input.height != 0 and input.width != 0), \ + 'input height and widht must be setted' + if channel_shared: + partial_sum = input.height * input.width * num_channels + else: + partial_sum = input.height * input.width l = Layer( name=name, @@ -6455,6 +6692,7 @@ def prelu_layer(input, name=name, layer_type=LayerType.PRELU, parents=input, + num_filters=num_channels, size=l.config.size) @@ -6494,7 +6732,8 @@ def gated_unit_layer(input, :type input: LayerOutput :param size: The dimension of this layer's output. :type size: int - :param act: Activation type of the projection. LinearActivation is the default. + :param act: Activation type of the projection. LinearActivation is the default + activation. :type act: BaseActivation :param name: The name of this layer. It is optional. :type name: basestring @@ -6875,7 +7114,7 @@ def img_conv3d_layer(input, :type filter_size: int | tuple | list :param num_filters: The number of filters in each group. :type num_filters: int - :param act: Activation type. ReluActivation is the default. + :param act: Activation type. ReluActivation is the default activation. :type act: BaseActivation :param groups: The number of the filter groups. :type groups: int @@ -6890,8 +7129,8 @@ def img_conv3d_layer(input, parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param num_channels: The number of input channels. If the parameter is not set or - set to None, its actual value will be automatically set to - the channels number of the input . + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for details. @@ -6903,7 +7142,7 @@ def img_conv3d_layer(input, :type layer_attr: ExtraLayerAttribute :param trans: True if it is a convTransLayer, False if it is a convLayer :type trans: bool - :param layer_type: Specify the layer_type. If the parameter is set, it must be "deconv3d" + :param layer_type: Specify the layer type. If the parameter is set, it must be "deconv3d" when trans=True. If not set, it will be automatically set to "deconv3d" when trans=True and "conv3d" when trans=False. :type layer_type: basestring @@ -7067,7 +7306,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): :type offsets: LayerOutput :param sizes: The sizes of the sub-sequences, which should be sequence type. :type sizes: LayerOutput - :param act: Activation type, LinearActivation is the default. + :param act: Activation type, LinearActivation is the default activation. :type act: BaseActivation. :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index d323d34c3ff47614342934c2a02492f66d27dc10..9776ae18057d57dd994fac8b62090258252922c6 100644 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import math from activations import LinearActivation, ReluActivation, SoftmaxActivation, \ IdentityActivation, TanhActivation, SequenceSoftmaxActivation @@ -26,9 +26,9 @@ __all__ = [ 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru', - 'simple_attention', 'dot_product_attention', 'simple_gru2', - 'bidirectional_gru', 'text_conv_pool', 'bidirectional_lstm', 'inputs', - 'outputs' + 'simple_attention', 'dot_product_attention', 'multi_head_attention', + 'simple_gru2', 'bidirectional_gru', 'text_conv_pool', 'bidirectional_lstm', + 'inputs', 'outputs' ] ###################################################### @@ -1476,10 +1476,8 @@ def dot_product_attention(encoded_sequence, expand_as=encoded_sequence, name='%s_expand' % name) - m = linear_comb_layer( - weights=expanded, - vectors=encoded_sequence, - name='%s_dot-product' % name) + m = dot_prod_layer( + input1=expanded, input2=encoded_sequence, name='%s_dot-product' % name) attention_weight = fc_layer( input=m, @@ -1498,6 +1496,134 @@ def dot_product_attention(encoded_sequence, input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name) +@wrap_name_default() +def multi_head_attention(query, + key, + value, + key_proj_size, + value_proj_size, + head_num, + attention_type, + softmax_param_attr=None, + name=None): + """ + Calculate and return a context vector with dot-product attention mechanism. + The dimension of the context vector equals to value_proj_size * head_num. + + Please refer to **Attention Is All You Need** for more details. The link is + as follows: + https://arxiv.org/abs/1706.03762. + + The example usage is: + + .. code-block:: python + + context = multi_head_attention(query=decoder_state, + key=enc_seq, + value=enc_seq, + key_proj_size=64, + value_pro_size=64, + head_num=8, + attention_type='dot-product attention') + + :param name: A prefix attached to the name of each layer that defined inside + the multi_head_attention. + :type name: basestring + :param softmax_param_attr: The parameter attribute of sequence softmax + that is used to produce attention weight. + :type softmax_param_attr: ParameterAttribute + :param query: query is used to calculate attention weights over values at current step. + :type query: LayerOutput + :param key: key is used to calculate the attention weight of the corresponding value. + :type key: LayerOutput + :param value: value is the sequence to be attended. + :type value: LayerOutput + :param key_proj_size: The dimension of the linear projection performed on key and query. + :type key_proj_size: int + :param value_proj_size: The dimension of the linear projection performed on value. + :type value_proj_size: int + :param head_num: The number of attention heads. + :type head_num: int + :param attention_type: The type of the attention mechanism used in each attention + heads. Now, we only support scaled dot-product attention and + additive attention. + :type attention_type: basestring + :return: The context vector. + :rtype: LayerOutput + """ + assert attention_type in ['dot-product attention', 'additive attention'] + + with mixed_layer( + size=key_proj_size * head_num, + name='%s_query_proj' % name) as query_proj: + query_proj += full_matrix_projection(query) + query_proj = expand_layer(input=query_proj, expand_as=key) + + with mixed_layer( + size=key_proj_size * head_num, + name='%s_key_proj' % name) as key_proj: + key_proj += full_matrix_projection(key) + + with mixed_layer( + size=value_proj_size * head_num, + name='%s_value_proj' % name) as value_proj: + value_proj += full_matrix_projection(value) + + head_list = [] + for i in range(head_num): + with mixed_layer(size=key_proj_size) as sub_query_proj: + sub_query_proj += identity_projection( + query_proj, offset=key_proj_size * i, size=key_proj_size) + + with mixed_layer(size=key_proj_size) as sub_key_proj: + sub_key_proj += identity_projection( + key_proj, offset=key_proj_size * i, size=key_proj_size) + + with mixed_layer(size=value_proj_size) as sub_value_proj: + sub_value_proj += identity_projection( + value_proj, offset=value_proj_size * i, size=value_proj_size) + + if attention_type == 'dot-product attention': + m = dot_prod_layer( + input1=sub_query_proj, + input2=sub_key_proj, + name='%s_dot-product_%d' % (name, i)) + m = slope_intercept_layer( + input=m, + slope=math.sqrt(1.0 / key_proj_size), + name='%s_dot-product_scaling_%d' % (name, i)) + else: + with mixed_layer( + size=key_proj_size, + act=TanhActivation(), + name='%s_combine_%d' % (name, i)) as m: + m += identity_projection(sub_query_proj) + m += identity_projection(sub_key_proj) + + attention_weight = fc_layer( + input=m, + size=1, + act=SequenceSoftmaxActivation(), + param_attr=softmax_param_attr, + name="%s_softmax_%d" % (name, i), + bias_attr=False) + + scaled = scaling_layer( + weight=attention_weight, + input=sub_value_proj, + name='%s_scaling_%d' % (name, i)) + head = pooling_layer( + input=scaled, + pooling_type=SumPooling(), + name="%s_pooling_%d" % (name, i)) + + head_list.append(head) + + attended = concat_layer(head_list) + + return attended + + def inputs(layers, *args): """ Declare the inputs of network. The order of input should be as same as diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 1c7451e0abf5dc1b99671f292e2ffc2d2282abe9..a21f67a2d99e7eab39708e2a571d30d7e9f20ce6 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -10,6 +10,7 @@ test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_la test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer -test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer test_scale_sub_region_layer) +test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer +test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index b14121e82cb7d9516c4771fc896b9b3b9e01d1c8..3e0f957648879d4350d662b336c953273bac1378 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -65,6 +65,7 @@ layers { height: 227 width: 227 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index c7a487a11231cba6182b654108773037bdb0ec35..a18a4652e14c0cfc4dbca87e67d31aa663ee756b 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -65,6 +65,7 @@ layers { height: 256 width: 256 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr index 832ed24a31dd2bedba9a4fce77d7a088d1796fdb..9b69ae4a3b3cbcc7c0c69a2d5b3728e2f0204f33 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr @@ -36,6 +36,7 @@ layers { height: 6 width: 20 depth: 3 + epsilon: 1e-05 } parameters { name: "___batch_norm_0__.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..f1530c382c3d81a82592af2c43c06eb4278e2b4a --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr @@ -0,0 +1,38 @@ +type: "nn" +layers { + name: "vector1" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "vector2" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__dot_prod_layer_0__" + type: "dot_prod" + size: 1 + active_type: "" + inputs { + input_layer_name: "vector1" + } + inputs { + input_layer_name: "vector2" + } +} +input_layer_names: "vector1" +input_layer_names: "vector2" +output_layer_names: "__dot_prod_layer_0__" +sub_models { + name: "root" + layer_names: "vector1" + layer_names: "vector2" + layer_names: "__dot_prod_layer_0__" + input_layer_names: "vector1" + input_layer_names: "vector2" + output_layer_names: "__dot_prod_layer_0__" + is_recurrent_layer_group: false +} diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..9ba33689edc893c2169a73679a04a6f51cfc83a8 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr @@ -0,0 +1,39 @@ +type: "nn" +layers { + name: "x" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "y" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "__l2_distance_layer_0__" + type: "l2_distance" + size: 1 + active_type: "" + inputs { + input_layer_name: "x" + } + inputs { + input_layer_name: "y" + } +} +input_layer_names: "x" +input_layer_names: "y" +output_layer_names: "__l2_distance_layer_0__" +sub_models { + name: "root" + layer_names: "x" + layer_names: "y" + layer_names: "__l2_distance_layer_0__" + input_layer_names: "x" + input_layer_names: "y" + output_layer_names: "__l2_distance_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr index 94ad56cab063df9e6a11bb1c293727fb9dec810f..63fb38c6508675d379f577b965ea17ad4c3b4942 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr @@ -4,6 +4,8 @@ layers { type: "data" size: 300 active_type: "" + height: 10 + width: 10 } layers { name: "__prelu_layer_0__" @@ -15,6 +17,9 @@ layers { input_parameter_name: "___prelu_layer_0__.w0" } partial_sum: 1 + height: 10 + width: 10 + depth: 1 } layers { name: "__prelu_layer_1__" @@ -26,6 +31,9 @@ layers { input_parameter_name: "___prelu_layer_1__.w0" } partial_sum: 1 + height: 10 + width: 10 + depth: 1 } layers { name: "__prelu_layer_2__" @@ -37,41 +45,100 @@ layers { input_parameter_name: "___prelu_layer_2__.w0" } partial_sum: 5 + height: 10 + width: 10 + depth: 1 +} +layers { + name: "__prelu_layer_3__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_3__.w0" + } + partial_sum: 300 + height: 10 + width: 10 + depth: 1 +} +layers { + name: "__prelu_layer_4__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_4__.w0" + } + partial_sum: 100 + height: 10 + width: 10 + depth: 1 } parameters { name: "___prelu_layer_0__.w0" size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 300 initial_strategy: 0 - initial_smart: true + initial_smart: false } parameters { name: "___prelu_layer_1__.w0" size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 300 initial_strategy: 0 - initial_smart: true + initial_smart: false } parameters { name: "___prelu_layer_2__.w0" size: 60 - initial_mean: 0.0 - initial_std: 0.129099444874 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 60 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___prelu_layer_3__.w0" + size: 1 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___prelu_layer_4__.w0" + size: 3 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 3 initial_strategy: 0 - initial_smart: true + initial_smart: false } input_layer_names: "input" -output_layer_names: "__prelu_layer_2__" +output_layer_names: "__prelu_layer_4__" sub_models { name: "root" layer_names: "input" layer_names: "__prelu_layer_0__" layer_names: "__prelu_layer_1__" layer_names: "__prelu_layer_2__" + layer_names: "__prelu_layer_3__" + layer_names: "__prelu_layer_4__" input_layer_names: "input" - output_layer_names: "__prelu_layer_2__" + output_layer_names: "__prelu_layer_4__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..e52d48dde0084aacd3f7874cc384d59287a0c7d5 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py @@ -0,0 +1,7 @@ +from paddle.trainer_config_helpers import * + +vec1 = data_layer(name='vector1', size=10) +vec2 = data_layer(name='vector2', size=10) +dot_product = dot_prod_layer(input1=vec1, input2=vec2) + +outputs(dot_product) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..b36a5c6d1222860ee4b77f89ad4b6148ccd89589 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py @@ -0,0 +1,7 @@ +from paddle.trainer_config_helpers import * + +outputs( + l2_distance_layer( + x=data_layer( + name='x', size=128), y=data_layer( + name='y', size=128))) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py index aae90fab32db78a70c2169ed8fafb930433f4136..45b02fbf325bb63b057bbbf64d59af8debf0bc9d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py @@ -1,8 +1,10 @@ from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) -prelu = prelu_layer(input=data) -prelu = prelu_layer(input=data, partial_sum=1) -prelu = prelu_layer(input=data, partial_sum=5) +data = data_layer(name='input', size=300, height=10, width=10) +prelu = prelu_layer(input=data, num_channels=3) +prelu = prelu_layer(input=data, partial_sum=1, num_channels=3) +prelu = prelu_layer(input=data, partial_sum=5, num_channels=3) +prelu = prelu_layer(input=data, channel_shared=True, num_channels=3) +prelu = prelu_layer(input=data, channel_shared=False, num_channels=3) outputs(prelu) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 1c8d8f4b2f626bea5d9a44d01de7c2c9c45dc2fb..33a0829ba8d635ebd68b50f3da07da958fb79dcb 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -33,10 +33,11 @@ import networks import minibatch import plot import image -import model import paddle.trainer.config_parser as cp __all__ = [ + 'default_startup_program', + 'default_main_program', 'optimizer', 'layer', 'activation', @@ -56,12 +57,65 @@ __all__ = [ 'evaluator', 'image', 'master', - 'model', ] cp.begin_parse() +def set_omp_mkl_env_vars(trainer_count): + '''Auto set CPU environment if have not set before. + export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status. + export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count. + ''' + import platform + if not platform.system() in ['Linux', 'Darwin']: + return + + def set_env(key, value): + '''If the key has not been set in the environment, set it with value.''' + assert isinstance(key, str) + assert isinstance(value, str) + envset = os.environ.get(key) + if envset is None: + os.environ[key] = value + + def num_physical_cores(): + '''Get the number of physical cores''' + if platform.system() == "Linux": + num_sockets = int( + os.popen("lscpu |grep \"Socket\" |awk -F':' '{print $2}'|xargs") + .read()) + num_cores_per_socket = int( + os.popen( + "lscpu |grep \"per socket\" |awk -F':' '{print $2}'|xargs") + .read()) + return num_sockets * num_cores_per_socket + else: + cmds = {"Darwin": "sysctl -n hw.physicalcpu"} + return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) + + def num_logical_processors(): + '''Get the number of logical processors''' + cmds = { + "Linux": "grep \"processor\" /proc/cpuinfo|sort -u|wc -l", + "Darwin": "sysctl -n hw.logicalcpu" + } + return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) + + num_cores = num_physical_cores() + num_processors = num_logical_processors() + if num_processors > num_cores: # Hyper Threading is enabled + set_env("OMP_DYNAMIC", "true") + set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") + else: + set_env("OMP_DYNAMIC", "false") + set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") + threads = num_processors / trainer_count + threads = '1' if threads < 1 else str(threads) + set_env("OMP_NUM_THREADS", threads) + set_env("MKL_NUM_THREADS", threads) + + def init(**kwargs): import py_paddle.swig_paddle as api args = [] @@ -76,6 +130,8 @@ def init(**kwargs): for key in args_dict.keys(): args.append('--%s=%s' % (key, str(args_dict[key]))) + set_omp_mkl_env_vars(kwargs.get('trainer_count', 1)) + if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 180d0135ffe8fa8982cfcde242033b5a69eed1cf..3a8f1831cf2c44c81aee62c6ee172942db188217 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -1,59 +1,187 @@ -import paddle.v2.fluid.op as op import numpy as np +from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable import paddle.v2.fluid.core as core -def avg_accumulate(accumulated_var, per_eval, num_batches, place): - t = np.array(accumulated_var.get_tensor()) - t[0] += per_eval[0] - accumulated_var.get_tensor().set([t[0] / float(num_batches)], place) +def _clone_var_in_block_(block, var): + assert isinstance(var, Variable) + return block.create_var( + name=var.name, + shape=var.shape, + dtype=var.data_type, + type=var.type, + lod_level=var.lod_level, + persistable=True) class Evaluator(object): - def __init__(self, - scope, - operator='accuracy', - input='Inference', - label='Label', - output='Output', - place=core.CPUPlace()): - """ - create an evaluator for evaluating the inference. - NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much. - - :param scope: the scope instance contains the input. - :type scope: paddle.v2.fluid.core.scope - :param operator: operator name for caculating the evaluation for each mini-batch. - :type operator: string - :param input: output variable name of forward network. - :type input: string - :param label: variable name of label - :type label: string - """ - self.scope = scope - self.place = place - self.output_name = output - self.num_batches = 0 - # create variable to store accumulated evaluator output - eval_name = ''.join([operator, "@Eval"]) - if scope.find_var(eval_name): - raise Exception("evaluator already exist in scope: %s" % eval_name) - self.accumulated_var = scope.var(eval_name) - t = self.accumulated_var.get_tensor() - t.set_dims((1, )) - t.set([0.0], place) - # self.accumulated_var = block.create_var(block, name=eval_name, shape=(1,)) - # self.accumulated_var.get_tensor().set([0.0]) - # create operator of evaluation - var_map = dict() # var name -> variable - var_map[input] = [input] - var_map[label] = [label] - var_map[output] = [output] - self.op = op.Operator(operator, **var_map) - - def evaluate(self, ctx, accumulator=avg_accumulate): - self.op.run(self.scope, ctx) - per_eval = np.array(self.scope.find_var(self.output_name).get_tensor()) - self.num_batches += 1 - accumulator(self.accumulated_var, per_eval, self.num_batches, - self.place) + """ + Evalutor Base class. + + create metric states + add mini-batch evaluator caculate operator + add increment operator to accumulate the metric states + """ + + def __init__(self, name, **kwargs): + """ + init the global states + """ + self._states = {} + if kwargs.has_key("main_program"): + self._main_program = kwargs.get("main_program") + else: + self._main_program = g_main_program + + def _update_ops(self, *args, **kwargs): + """ + append update ops to the global states + """ + raise NotImplementedError() + + def reset(self, executor, reset_program=None): + """ + Clear metric states at the begin of each pass/user specified batch + """ + if reset_program == None: + reset_program = Program() + else: + reset_program = program + block = reset_program.global_block() + for k, var in self._states.iteritems(): + g_var = _clone_var_in_block_(block, var) + zeros = block.create_var(dtype="float32", persistable=True) + block.append_op( + type="fill_constant", + outputs={"Out": [zeros]}, + attrs={ + "shape": g_var.shape, + "value": .0, + "data_type": 5, + }) + block.append_op( + type="scale", inputs={"X": zeros}, outputs={"Out": g_var}) + executor.run(reset_program, fetch_list=self._states.values()) + + def eval(self, executor, eval_program=None): + """ + Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. + """ + raise NotImplementedError() + + +class Accuracy(Evaluator): + """ + Accuracy need two state variable Total, Correct + """ + + def __init__(self, *args, **kwargs): + super(Accuracy, self).__init__("accuracy", **kwargs) + block = self._main_program.global_block() + g_total = block.create_var( + name=unique_name("Total"), + persistable=True, + dtype="int64", + shape=[1]) + g_correct = block.create_var( + name=unique_name("Correct"), + persistable=True, + dtype="int64", + shape=[1]) + self._states["Total"] = g_total + self._states["Correct"] = g_correct + + def _update_ops(self, input, label, k=1, **kwargs): + block = self._main_program.global_block() + topk_out = block.create_var(dtype=input.data_type) + topk_indices = block.create_var(dtype="int64") + block.append_op( + type="top_k", + inputs={"X": [input]}, + outputs={"Out": [topk_out], + "Indices": [topk_indices]}, + attrs={"k": k}) + acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32")) + correct = block.create_var(dtype="int64", persistable=True) + total = block.create_var(dtype="int64", persistable=True) + block.append_op( + type="accuracy", + inputs={ + "Out": [topk_out], + "Indices": [topk_indices], + "Label": [label] + }, + outputs={ + "Accuracy": [acc_out], + "Correct": [correct], + "Total": [total], + }) + + block.append_op( + type="cast", + inputs={"X": [self._states["Total"]]}, + outputs={"Out": [self._states["Total"]]}, + attrs={ + "in_data_type": 5, # float32 + "out_data_type": 2, #int32 + }) + block.append_op( + type="cast", + inputs={"X": [self._states["Correct"]]}, + outputs={"Out": [self._states["Correct"]]}, + attrs={ + "in_data_type": 5, + "out_data_type": 2, + }) + + block.append_op( + type="elementwise_add", + inputs={"X": [self._states["Total"]], + "Y": [total]}, + outputs={"Out": [self._states["Total"]]}) + block.append_op( + type="elementwise_add", + inputs={"X": [self._states["Correct"]], + "Y": [correct]}, + outputs={"Out": [self._states["Correct"]]}) + + return acc_out + + def eval(self, executor, eval_program=None): + if eval_program != None: + eval_program = eval_program + else: + eval_program = Program() + block = eval_program.global_block() + eval_out = block.create_var(dtype=self._states["Total"].data_type) + e_total = _clone_var_in_block_(block, self._states["Total"]) + e_correct = _clone_var_in_block_(block, self._states["Correct"]) + block.append_op( + type="cast", + inputs={"X": [e_total]}, + outputs={"Out": [e_total]}, + attrs={ + "in_data_type": 2, #int32 + "out_data_type": 5, #float32 + }) + block.append_op( + type="cast", + inputs={"X": [e_correct]}, + outputs={"Out": [e_correct]}, + attrs={ + "in_data_type": 2, + "out_data_type": 5, + }) + block.append_op( + type="elementwise_div", + inputs={"X": e_correct, + "Y": e_total}, + outputs={"Out": eval_out}) + out = executor.run(eval_program, fetch_list=[eval_out]) + return np.array(out[0]) + + +def accuracy(*args, **kwargs): + cls = Accuracy(*args, **kwargs) + out = cls._update_ops(*args, **kwargs) + return cls, out diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index e2587b4f74506c6eb2b253fa9b24db4838bfedbc..7f7c310ad87f64e5d047ecfc2876d516914c75c8 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -4,7 +4,10 @@ import collections import numpy as np import copy -__all__ = ['Block', 'Variable', 'Program', 'Operator'] +__all__ = [ + 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', + 'default_main_program' +] def unique_name(prefix): @@ -12,9 +15,40 @@ def unique_name(prefix): return "_".join([prefix, str(uid)]) -def _debug_string_(proto): +def convert_np_dtype_to_dtype_(np_dtype): + dtype = np.dtype(np_dtype) + if dtype == np.float32: + return core.DataType.FP32 + elif dtype == np.float64: + return core.DataType.FP64 + elif dtype == np.float16: + return core.DataType.FP16 + elif dtype == np.int32: + return core.DataType.INT32 + elif dtype == np.int16: + return core.DataType.INT16 + elif dtype == np.int64: + return core.DataType.INT64 + elif dtype == np.bool: + return core.DataType.BOOL + else: + raise ValueError("Not supported numpy dtype " + str(dtype)) + + +def dtype_is_floating(dtype): + if not isinstance(dtype, core.DataType): + dtype = convert_np_dtype_to_dtype_(dtype) + + if (dtype == core.DataType.FP16 or dtype == core.DataType.FP32 or + dtype == core.DataType.FP64): + return True + else: + return False + + +def _debug_string_(proto, throw_on_error=True): error_fields = list() - if not proto.IsInitialized(error_fields): + if not proto.IsInitialized(error_fields) and throw_on_error: raise ValueError("{0} are not initialized\nThe message is {1}".format( error_fields, proto)) return proto.__str__() @@ -63,7 +97,7 @@ class Variable(object): "matched.".format(self.name, old_shape, shape)) if dtype is not None: if not isinstance(dtype, core.DataType): - dtype = Variable._convert_np_dtype_to_dtype_(dtype) + dtype = convert_np_dtype_to_dtype_(dtype) if is_new_var: self.desc.set_data_type(dtype) else: @@ -101,9 +135,12 @@ class Variable(object): self.stop_gradient = stop_gradient def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) __repr__ = __str__ @@ -142,26 +179,6 @@ class Variable(object): uid = core.unique_integer(prefix) # unique during whole process. return "_".join([prefix, str(uid)]) - @staticmethod - def _convert_np_dtype_to_dtype_(np_dtype): - dtype = np.dtype(np_dtype) - if dtype == np.float32: - return core.DataType.FP32 - elif dtype == np.float64: - return core.DataType.FP64 - elif dtype == np.float16: - return core.DataType.FP16 - elif dtype == np.int32: - return core.DataType.INT32 - elif dtype == np.int16: - return core.DataType.INT16 - elif dtype == np.int64: - return core.DataType.INT64 - elif dtype == np.bool: - return core.DataType.BOOL - else: - raise ValueError("Not supported numpy dtype " + str(dtype)) - def get_all_op_protos(): """ @@ -229,17 +246,17 @@ class Operator(object): in_proto.name) if found: - in_argus = inputs[in_proto.name] - if not isinstance(in_argus, list): - in_argus = [in_argus] - if not in_proto.duplicable and len(in_argus) > 1: + in_args = inputs[in_proto.name] + if not isinstance(in_args, list): + in_args = [in_args] + if not in_proto.duplicable and len(in_args) > 1: raise ValueError( "Input %s expects only one input, but %d are given." - % (in_proto.name, len(in_argus))) - in_argu_names = [] - for argu in in_argus: - in_argu_names.append(argu.name) - self.desc.set_input(in_proto.name, in_argu_names) + % (in_proto.name, len(in_args))) + in_arg_names = [] + for arg in in_args: + in_arg_names.append(arg.name) + self.desc.set_input(in_proto.name, in_arg_names) else: self.desc.set_input(in_proto.name, []) @@ -257,18 +274,18 @@ class Operator(object): str(e) for e in given))) for out_proto in proto.outputs: - out_argus = outputs[out_proto.name] - if not isinstance(out_argus, list): - out_argus = [out_argus] - if not out_proto.duplicable and len(out_argus) > 1: + out_args = outputs[out_proto.name] + if not isinstance(out_args, list): + out_args = [out_args] + if not out_proto.duplicable and len(out_args) > 1: raise ValueError( "Output %s expects only one output, but %d are given." % - (out_proto.name, len(out_argus))) - out_argu_names = [] - for argu in out_argus: - out_argu_names.append(argu.name) - argu.op = self - self.desc.set_output(out_proto.name, out_argu_names) + (out_proto.name, len(out_args))) + out_arg_names = [] + for arg in out_args: + out_arg_names.append(arg.name) + arg.op = self + self.desc.set_output(out_proto.name, out_arg_names) if attrs is not None: if not isinstance(attrs, dict): @@ -291,10 +308,13 @@ class Operator(object): self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) - def __str__(self): + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.OpDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) + + def __str__(self): + return self.to_string(True) __repr__ = __str__ @@ -349,9 +369,12 @@ class Block(object): self.program = program def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.BlockDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) __repr__ = __str__ @@ -454,9 +477,12 @@ class Program(object): self.current_block_idx = 0 def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.ProgramDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) def clone(self): p = Program() @@ -512,7 +538,14 @@ class Program(object): assert isinstance(target, Variable) if no_grad_set is None: no_grad_set = set() - param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set) + try: + param_to_grad_info = self.desc.append_backward(target.desc, + no_grad_set) + except Exception as e: + raise core.EnforceNotMet( + str(e) + "\nCurrent protobuf is\n{0}".format( + self.to_string(False))) + self.sync_with_cpp() return param_to_grad_info @@ -562,3 +595,11 @@ class Parameter(Variable): # program is a global instance. g_main_program = Program() g_startup_program = Program() + + +def default_startup_program(): + return g_startup_program + + +def default_main_program(): + return g_main_program diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index ded144ecd5db83ce50ca0dc6243fdc52ac0b7a2f..1a9d804ee7ee8e6463d42fefb809fb45888fd064 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -285,3 +285,86 @@ class XavierInitializer(Initializer): }) var.op = op return op + + +class MSRAInitializer(Initializer): + """Implements the MSRA initializer a.k.a. Kaiming Initializer + + This class implements the weight initialization from the paper + Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren + and Jian Sun. This is a robust initialization method that particularly + considers the rectifier nonlinearities. In case of Uniform distribution, + the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal + distribution, the mean is 0 and the standard deviation + is sqrt(2/ fan_in). + + References: + [1] Delving Deep into Rectifiers: Surpassing Human-Level Performance + on ImageNet Classification + (https://arxiv.org/abs/1502.01852) + """ + + def __init__(self, uniform=True, fan_in=None, seed=0): + """Constructor for MSRAInitializer + + Args: + uniform: whether to use uniform or normal distribution + fan_in: fan_in for MSRAInitializer. If None, it is + inferred from the variable. + seed: random seed + + Note: It is recommended to set fan_in to None for most cases. + """ + assert uniform is not None + assert seed is not None + super(MSRAInitializer, self).__init__() + self._uniform = uniform + self._fan_in = fan_in + self._seed = seed + + def __call__(self, var, block): + """Add MSRA initialization ops for a variable + + Args: + var: Variable that needs to be initialized + block: The block in which initialization ops + should be added + + Returns: + the initialization op + """ + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + f_in, f_out = self._compute_fans(var) + + # If fan_in is passed, use it + fan_in = f_in if self._fan_in is None else self._fan_in + + if self._uniform: + limit = np.sqrt(6.0 / float(fan_in)) + op = block.prepend_op( + type="uniform_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "min": -limit, + "max": limit, + "seed": self._seed + }) + + else: + std = np.sqrt(2.0 / float(fan_in)) + op = block.prepend_op( + type="gaussian_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "mean": 0.0, + "std": std, + "seed": self._seed + }) + var.op = op + return op diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 394a171c67a99ffb0c7caaf71e850fe541f8286e..2d070814eef0b099ba71bef223596e30388ac48a 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -35,7 +35,7 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): :param executor: executor that save variable :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this + :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default g_program. :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the variables will be saved. @@ -96,11 +96,11 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): :param executor: executor that save variable :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this + :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default g_program. :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the variables will be loaded. - :param vars: variables need to be loaded. If specify vars, program & + :param vars: variables need to be loaded. If specify vars, program & predicate will be ignored :return: None """ @@ -157,15 +157,15 @@ def save_inference_model(dirname, executor, main_program=None): """ - Build a model especially for inference, + Build a model especially for inference, and save it to directory by the executor. :param dirname: directory path :param feeded_var_names: Names of variables that need to be feeded data during inference :param target_vars: Variables from which we can get inference results. :param executor: executor that save inference model - :param main_program: original program, which will be pruned to build the inference model. - Default g_program. + :param main_program: original program, which will be pruned to build the inference model. + Default g_main_program. :return: None """ @@ -234,3 +234,35 @@ def load_inference_model(dirname, executor): fetch_vars = [program.global_block().var(name) for name in fetch_var_names] return [program, feed_var_names, fetch_vars] + + +def get_parameter_value(para, executor): + """ + Get the LoDTensor for the parameter + + :param executor: executor for retrieving the value + :param para: the given parameter + :return: the LoDTensor for the parameter + """ + assert is_parameter(para) + + get_program = Program() + block = get_program.global_block() + new_var = _clone_var_in_block_(block, para) + return executor.run(get_program, feed={}, fetch_list=[new_var])[0] + + +def get_parameter_value_by_name(name, executor, program=None): + """ + Get the LoDTensor for paramter with the given name + + :param executor: executor for retrieving the value + :param name: the name of the parameter + :param program: the program where the variable is found + Default g_main_program. + :return: the LoDTensor for the variable + """ + if program is None: + program = g_main_program + var = program.global_block().var(name) + return get_parameter_value(var, executor) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 9dc3c119ea47ca11956d85119ce1ec6d3d6bb7e8..e40551ca73e991edd8e1d1df5b103c36367b7050 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -2,7 +2,7 @@ import copy import itertools from paddle.v2.fluid.framework import Variable, g_main_program, \ - g_startup_program, unique_name, Program + g_startup_program, unique_name, Program, dtype_is_floating from paddle.v2.fluid.initializer import ConstantInitializer, \ UniformInitializer, XavierInitializer @@ -61,7 +61,7 @@ class LayerHelper(object): @property def param_attr(self): - default = {'name': None, 'initializer': XavierInitializer()} + default = {'name': None} actual = self.kwargs.get('param_attr', None) if actual is None: actual = default @@ -72,7 +72,7 @@ class LayerHelper(object): @property def bias_attr(self): - default = {'name': None, 'initializer': XavierInitializer()} + default = {'name': None} bias_attr = self.kwargs.get('bias_attr', None) if bias_attr is None: bias_attr = default @@ -119,12 +119,17 @@ class LayerHelper(object): attr_copy = copy.deepcopy(attr) if initializer is not None: attr_copy['initializer'] = initializer + else: + attr_copy['initializer'] = self._get_default_initializer(dtype) if attr_copy['name'] is None: attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr_copy) return self.main_program.global_block().create_parameter( - name=attr_copy['name'], dtype=dtype, shape=shape) + name=attr_copy['name'], + dtype=dtype, + shape=shape, + trainable=attr_copy.get('trainable', True)) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( @@ -149,36 +154,42 @@ class LayerHelper(object): persistable=True, initializer=initializer) - def append_bias_op(self, input_var, num_flatten_dims=None): + def append_bias_op(self, + input_var, + bias_initializer, + dim_start=1, + dim_end=None): """ - Append bias operator and return its output. If the user does not set + Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var - - :param input_var: the input variable. The len(input_var.shape) is larger - or equal than 2. - :param num_flatten_dims: The input tensor will be flatten as a matrix - when adding bias. - `matrix.shape = product(input_var.shape[0:num_flatten_dims]), product( - input_var.shape[num_flatten_dims:])` - """ - if num_flatten_dims is None: - num_flatten_dims = self.kwargs.get('num_flatten_dims', None) - if num_flatten_dims is None: - num_flatten_dims = 1 - size = list(input_var.shape[num_flatten_dims:]) + :param input_var: the input variable. The len(input_var.shape) is + larger or equal than 2. + :bias_initializer: an instance of a subclass of Initializer used to + initialize the bias + :param dim_start: + :param dim_end: the shape of the bias will be + input_var.shape[dim_start:dim_end]. The bias is broadcasted to other + dimensions and added to input_var to get the output + """ + size = list(input_var.shape[dim_start:dim_end]) bias_attr = self.bias_attr if not bias_attr: return input_var b = self.create_parameter( - attr=bias_attr, shape=size, dtype=input_var.data_type, suffix='b') + attr=bias_attr, + shape=size, + dtype=input_var.data_type, + suffix='b', + initializer=bias_initializer) tmp = self.create_tmp_variable(dtype=input_var.data_type) self.append_op( type='elementwise_add', inputs={'X': [input_var], 'Y': [b]}, - outputs={'Out': [tmp]}) + outputs={'Out': [tmp]}, + attrs={'axis': dim_start}) return tmp def append_activation(self, input_var): @@ -195,3 +206,10 @@ class LayerHelper(object): outputs={"Y": [tmp]}, attrs=act) return tmp + + def _get_default_initializer(self, dtype): + if dtype is None or dtype_is_floating(dtype) is True: + return XavierInitializer() + else: + # For integer and boolean types, initialize with all zeros + return ConstantInitializer() diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 8a1aa1c42d5a006539d221f96e3535434c9a4c43..fac91aac97267b1ecc867bb9b0b1f8fd40f2f299 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -3,7 +3,7 @@ import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \ Operator from paddle.v2.fluid.initializer import ConstantInitializer, \ - NormalInitializer + NormalInitializer, XavierInitializer from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO @@ -17,11 +17,13 @@ __all__ = [ def fc(input, size, + num_flatten_dims=1, param_attr=None, + param_initializer=None, bias_attr=None, - name=None, + bias_initializer=None, act=None, - num_flatten_dims=1, + name=None, main_program=None, startup_program=None): """ @@ -30,11 +32,15 @@ def fc(input, Args: input: The input tensor to the function size: The size of the layer + num_flatten_dims: Number of columns in input param_attr: The parameters/weights to the FC Layer + param_initializer: Initializer used for the weight/parameter. + If None, XavierInitializer() is used bias_attr: The bias parameter for the FC layer - name: Name/alias of the function + bias_initializer: Initializer used for the bias. + If None, then ConstantInitializer() is used act: Activation to be applied to the output of FC layer - num_flatten_dims: Number of columns in input + name: Name/alias of the function main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -50,10 +56,23 @@ def fc(input, to the LayerHelper constructor. """ + + def _get_default_param_initializer(): + return XavierInitializer() + + def _get_default_bias_initializer(): + return ConstantInitializer() + helper = LayerHelper('fc', **locals()) dtype = helper.input_dtype() + if param_initializer is None: + param_initializer = _get_default_param_initializer() + + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape @@ -61,7 +80,10 @@ def fc(input, reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) ] + [size] w = helper.create_parameter( - attr=param_attr, shape=param_shape, dtype=dtype) + attr=param_attr, + initializer=param_initializer, + shape=param_shape, + dtype=dtype) tmp = helper.create_tmp_variable(dtype) helper.append_op( type="mul", @@ -82,16 +104,17 @@ def fc(input, helper.append_op( type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) # add bias - pre_activation = helper.append_bias_op(pre_bias) + pre_activation = helper.append_bias_op(pre_bias, bias_initializer) # add activation return helper.append_activation(pre_activation) def embedding(input, size, - data_type='float32', is_sparse=False, + param_initializer=None, param_attr=None, + data_type='float32', main_program=None, startup_program=None): """ @@ -100,9 +123,9 @@ def embedding(input, Args: input: The input to the function size: The size of the layer - data_type: The type of data : float32, float_16, int etc is_sparse: A flag that decleares whether the input is sparse param_attr: Parameters for this layer + data_type: The type of data : float32, float_16, int etc main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -114,9 +137,16 @@ def embedding(input, to the LayerHelper constructor. """ + + def _get_default_param_initializer(): + return XavierInitializer() + helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=data_type) + attr=helper.param_attr, + shape=size, + dtype=data_type, + initializer=param_initializer or _get_default_param_initializer()) tmp = helper.create_tmp_variable(data_type) helper.append_op( type='lookup_table', @@ -130,7 +160,6 @@ def embedding(input, # TODO(qijun): expose H0 and C0 def dynamic_lstm(input, size, - data_type='float32', param_attr=None, bias_attr=None, use_peepholes=True, @@ -138,6 +167,7 @@ def dynamic_lstm(input, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', + data_type='float32', main_program=None, startup_program=None): helper = LayerHelper('lstm', **locals()) @@ -178,9 +208,9 @@ def dynamic_lstm(input, def data(name, shape, + append_batch_size=True, data_type='float32', type=core.VarDesc.VarType.LOD_TENSOR, - append_batch_size=True, main_program=None, startup_program=None, stop_gradient=True): @@ -190,9 +220,9 @@ def data(name, Args: name: The name/alias of the function shape: Tuple declaring the shape. + append_batch_size: Whether or not to append the data as a batch. data_type: The type of data : float32, float_16, int etc type: The output type. By default it is LOD_TENSOR. - append_batch_size: Whether or not to append the data as a batch. main_program: Name of the main program that calls this startup_program: Name of the startup program stop_gradient: A boolean that mentions whether gradient should flow. @@ -226,7 +256,7 @@ def data(name, stop_gradient=stop_gradient) -def create_tensor(dtype, name=None, main_program=None): +def create_tensor(dtype, name=None, main_program=None, startup_program=None): helper = LayerHelper("create_tensor", **locals()) return helper.create_variable(name=helper.name, dtype=dtype) @@ -250,7 +280,7 @@ def _convert_(name): def _generate_doc_string_(op_proto): """ Generate docstring by OpProto - + Args: op_proto (framework_pb2.OpProto): a protobuf message typed OpProto @@ -390,30 +420,12 @@ _create_op_func_('mul') _create_op_func_('elementwise_add') _create_op_func_('dropout') _create_op_func_('reshape') -_create_op_func_('elementwise_add') _create_op_func_('sigmoid') _create_op_func_('scale') _create_op_func_('reshape') _create_op_func_('transpose') -def fill_constant(data_type, shape, value=None, program=None): - """ - This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that - comes in the input. - """ - helper = LayerHelper('fill_constant', **locals()) - out = helper.create_tmp_variable(dtype=data_type) - helper.append_op( - type='fill_constant', - outputs={'Out': [out]}, - attrs={'data_type': data_type, - 'shape': shape, - 'value': value}) - return out - - def cast(x, data_type, main_program=None): """ This function takes in the input with input_data_type @@ -456,7 +468,42 @@ def sums(input, main_program=None, startup_program=None): return out -def assign(input, output, main_program=None): +def linear_chain_crf(input, + label, + param_attr=None, + param_initializer=None, + main_program=None, + startup_program=None): + def _get_default_param_initializer(): + return XavierInitializer() + + helper = LayerHelper('linear_chain_crf', **locals()) + size = input.shape[1] + transition = helper.create_parameter( + attr=helper.param_attr, + shape=[size + 2, size], + dtype=helper.input_dtype(), + initializer=param_initializer or _get_default_param_initializer()) + alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) + emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='linear_chain_crf', + inputs={"Emission": [input], + "Transition": transition, + "Label": label}, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }) + + return log_likelihood + + +def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( type='scale', @@ -468,7 +515,7 @@ def assign(input, output, main_program=None): def split_lod_tensor(input, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) @@ -490,11 +537,11 @@ def merge_lod_tensor(in_true, in_false, x, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=in_true.data_type) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -574,7 +621,9 @@ def accuracy(input, label, k=1, **kwargs): "Indices": [topk_indices]}, attrs={"k": k}) acc_out_dtype = kwargs.get("out_dtype", "float32") - acc_out = helper.create_tmp_variable(dtype=acc_out_dtype) + acc_out = helper.create_tmp_variable(dtype="float32") + correct = helper.create_tmp_variable(dtype="int64") + total = helper.create_tmp_variable(dtype="int64") helper.append_op( type="accuracy", inputs={ @@ -582,7 +631,11 @@ def accuracy(input, label, k=1, **kwargs): "Indices": [topk_indices], "Label": [label] }, - outputs={"Accuracy": [acc_out]}) + outputs={ + "Accuracy": [acc_out], + "Correct": [correct], + "Total": [total], + }) return acc_out @@ -590,10 +643,12 @@ def sequence_conv(input, num_filters, filter_size=3, filter_stride=1, - act=None, padding=None, bias_attr=None, + bias_initializer=None, param_attr=None, + param_initializer=None, + act=None, main_program=None, startup_program=None): """ @@ -601,6 +656,13 @@ def sequence_conv(input, other convolutional configurations for the filters and stride as given in the input parameters to the function. """ + + def _get_default_bias_initializer(): + return ConstantInitializer() + + def _get_default_param_initializer(): + return XavierInitializer() + # FIXME(dzh) : want to unify the argument of python layer # function. So we ignore some unecessary attributes. # such as, padding_trainable, context_start. @@ -608,9 +670,17 @@ def sequence_conv(input, helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() + if param_initializer is None: + param_initializer = _get_default_param_initializer() + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + filter_shape = [filter_size * input.shape[1], num_filters] filter = helper.create_parameter( - attr=helper.param_attr, shape=filter_shape, dtype=dtype) + attr=helper.param_attr, + shape=filter_shape, + dtype=dtype, + initializer=param_initializer) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -625,20 +695,22 @@ def sequence_conv(input, 'contextStart': -int(filter_size / 2), 'contextLength': filter_size }) - pre_act = helper.append_bias_op(pre_bias) + pre_act = helper.append_bias_op(pre_bias, bias_initializer) return helper.append_activation(pre_act) def conv2d(input, num_filters, - name=None, - filter_size=[1, 1], - act=None, - groups=None, + filter_size, stride=[1, 1], padding=None, - bias_attr=None, + groups=None, param_attr=None, + param_initializer=None, + bias_attr=None, + bias_initializer=None, + act=None, + name=None, main_program=None, startup_program=None): """ @@ -648,6 +720,14 @@ def conv2d(input, This funciton can also append an activation on top of the conv-2d output, if mentioned in the input parameters. """ + + def _get_default_bias_initializer(): + return ConstantInitializer() + + def _get_default_param_initializer(filter_size, num_channels): + std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + return NormalInitializer(0.0, std, 0) + helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -655,7 +735,7 @@ def conv2d(input, if groups is None: num_filter_channels = num_channels else: - if num_channels % groups is not 0: + if num_channels % groups != 0: raise ValueError("num_channels must be divisible by groups.") num_filter_channels = num_channels / groups @@ -669,12 +749,17 @@ def conv2d(input, input_shape = input.shape filter_shape = [num_filters, num_filter_channels] + filter_size - std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + if param_initializer is None: + param_initializer = _get_default_param_initializer(filter_size, + num_channels) + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + filter = helper.create_parameter( attr=helper.param_attr, shape=filter_shape, dtype=dtype, - initializer=NormalInitializer(0.0, std, 0)) + initializer=param_initializer) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -688,7 +773,8 @@ def conv2d(input, 'paddings': padding, 'groups': groups}) - pre_act = helper.append_bias_op(pre_bias, 1) + pre_act = helper.append_bias_op( + pre_bias, bias_initializer, dim_start=1, dim_end=2) return helper.append_activation(pre_act) @@ -1305,7 +1391,7 @@ def array_to_lod_tensor(x, table, main_program=None): return tmp -def fill_constant(shape, dtype, value, main_program=None): +def fill_constant(shape, dtype, value, main_program=None, startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and specified data_type and fills this up with a constant value that @@ -1326,6 +1412,31 @@ def fill_constant(shape, dtype, value, main_program=None): return out +def fill_constant_batch_size_like(input, + shape, + dtype, + value, + input_dim_idx=0, + output_dim_idx=0, + main_program=None, + startup_program=None): + helper = LayerHelper("fill_constant_batch_size_like", **locals()) + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='fill_constant_batch_size_like', + inputs={'Input': input}, + outputs={'Out': [out]}, + attrs={ + 'shape': shape, + 'data_type': out.data_type, + 'value': float(value), + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx + }) + out.stop_gradient = True + return out + + def ones(shape, dtype, main_program=None): """ This function performs the same function as fill_constant() declared above @@ -1388,7 +1499,7 @@ def create_array(dtype, main_program=None): dtype=dtype) -def less_than(x, y, cond=None, main_program=None): +def less_than(x, y, cond=None, main_program=None, **ignored): helper = LayerHelper("less_than", **locals()) if cond is None: cond = helper.create_tmp_variable(dtype='bool') @@ -1466,13 +1577,20 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): - def __init__(self, inputs, name=None, main_program=None): + def __init__(self, + inputs, + name=None, + main_program=None, + startup_program=None): for each_input in inputs: if not isinstance(each_input, Variable): raise TypeError("Each input should be variable") self.inputs = inputs self.helper = LayerHelper( - 'conditional_block', name=name, main_program=main_program) + 'conditional_block', + name=name, + main_program=main_program, + startup_program=startup_program) def block(self): return ConditionalBlockGuard(self) @@ -1517,3 +1635,148 @@ class ConditionalBlock(object): outputs={'Out': out_list, 'Scope': [step_scope]}, attrs={'block': inside_block}) + + +class IfElseBlockGuard(object): + def __init__(self, is_true, ifelse): + if not isinstance(ifelse, IfElse): + raise TypeError("ifelse must be an instance of IfElse class") + + if ifelse.status != IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("You cannot invoke IfElse.block() inside a block") + + self.is_true = is_true + self.ie = ifelse + if is_true: + self.cond_block = ifelse.conditional_true_block + else: + self.cond_block = ifelse.conditional_false_block + + if not isinstance(self.cond_block, ConditionalBlock): + raise TypeError("Unexpected situation") + + self.cond_block = self.cond_block.block() + + def __enter__(self): + self.ie.status = IfElse.IN_IF_ELSE_TRUE_BLOCKS if self.is_true else IfElse.IN_IF_ELSE_FALSE_BLOCKS + self.cond_block.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.cond_block.__exit__(exc_type, exc_val, exc_tb): + # re-raise inside exception + return False + if len(self.ie.output_table[1 if self.is_true else 0]) == 0: + raise ValueError("Must set output inside block") + self.ie.status = IfElse.OUT_IF_ELSE_BLOCKS + + +class IfElse(object): + OUT_IF_ELSE_BLOCKS = 0 + IN_IF_ELSE_TRUE_BLOCKS = 1 + IN_IF_ELSE_FALSE_BLOCKS = 2 + + def __init__(self, cond, name=None, main_program=None, + startup_program=None): + if not isinstance(cond, Variable): + raise TypeError("cond must be a Variable") + self.helper = LayerHelper( + 'ifelse', + name=name, + main_program=main_program, + startup_program=startup_program) + self.cond = cond + self.input_table = {} + self.status = IfElse.OUT_IF_ELSE_BLOCKS + self.conditional_true_block = ConditionalBlock(inputs=[self.cond]) + self.conditional_false_block = ConditionalBlock(inputs=[self.cond]) + self.output_table = ([], []) # (true_outs, false_outs) + + def input(self, x): + if self.status == IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("input must in true/false blocks") + if id(x) not in self.input_table: + parent_block = self.parent_block() + out_true = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + + out_false = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + parent_block.append_op( + type='split_lod_tensor', + inputs={ + 'X': x, + 'Mask': self.cond, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': 0}) + self.input_table[id(x)] = (out_true, out_false) + else: + out_true, out_false = self.input_table[id(x)] + + if self.status == IfElse.IN_IF_ELSE_TRUE_BLOCKS: + return out_true + else: + return out_false + + def parent_block(self): + current_block = self.helper.main_program.current_block() + return self.helper.main_program.block(current_block.parent_idx) + + def true_block(self): + return IfElseBlockGuard(True, self) + + def false_block(self): + return IfElseBlockGuard(False, self) + + def output(self, *outs): + if self.status == self.OUT_IF_ELSE_BLOCKS: + raise ValueError("output can only be invoked in the sub-block") + + out_table = self.output_table[1 if self.status == + self.IN_IF_ELSE_TRUE_BLOCKS else 0] + parent_block = self.parent_block() + for each_out in outs: + if not isinstance(each_out, Variable): + raise TypeError("Each output should be a variable") + # create outside tensor + outside_out = parent_block.create_var( + name=unique_name("_".join([self.helper.name, 'output'])), + dtype=each_out.data_type) + out_table.append(outside_out) + + # assign local var to outside + assign( + input=each_out, + output=outside_out, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program) + + def __call__(self): + if self.status != self.OUT_IF_ELSE_BLOCKS: + raise ValueError("IfElse::__call__ must be out of sub-block") + false_len, true_len = map(len, self.output_table) + if false_len == 0 and true_len == 0: + raise ValueError("Must invoke true_block/false_block before " + "__call__") + elif false_len != true_len and false_len != 0 and true_len != 0: + raise ValueError("The output side must be same") + elif false_len == 0 or true_len == 0: + return self.output_table[0 if false_len != 0 else 1] + + # else none of false_len/true_len is zero + # merge together + rlist = [] + for false_var, true_var in zip(*self.output_table): + rlist.append( + merge_lod_tensor( + in_true=true_var, + in_false=false_var, + mask=self.cond, + x=self.cond, + level=0, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program)) + return rlist diff --git a/python/paddle/v2/fluid/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py index 17ad547c2bb5b79ef8225dd1a8f1ef49a6572508..94fdd5e38970b309580de6fc934b158a3c46e464 100644 --- a/python/paddle/v2/fluid/net_drawer.py +++ b/python/paddle/v2/fluid/net_drawer.py @@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs): if not var_dict.has_key(var): var_dict[var] = "Feed" + temp_id = 0 proto = framework_pb2.ProgramDesc.FromString( program.desc.serialize_to_string()) for block in proto.blocks: for op in block.ops: + op.type = op.type + "_" + str(temp_id) + temp_id += 1 graph.node(**draw_node(op)) for o in op.outputs: for arg in o.arguments: @@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs): for arg in e.arguments: if var_dict.has_key(arg): graph.edge(**draw_edge(var_dict, op, e, arg)) + break # only plot the first block def draw_graph(startup_program, main_program, **kwargs): diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 4252a6f08509fec92ac5c45d32169232e1dd190f..87a478c2903b77d955ebde49a4a0e507c9e9ffd3 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -9,7 +9,7 @@ from paddle.v2.fluid.layer_helper import LayerHelper __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer' ] @@ -85,7 +85,7 @@ class Optimizer(object): """ if (name in self._accumulators and param.name in self._accumulators[name]): - raise Exception("Accumulator {} already exists for parmeter {}". + raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) assert isinstance(self.helper, LayerHelper) @@ -170,7 +170,8 @@ class Optimizer(object): optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[1] is not None: + if param_and_grad[0].trainable is True and param_and_grad[ + 1] is not None: optimize_op = self._append_optimize_op(loss.block, param_and_grad) optimize_ops.append(optimize_op) @@ -307,7 +308,7 @@ class AdagradOptimizer(Optimizer): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) - # create the adagrad optimizer op + # Create the adagrad optimizer op adagrad_op = block.append_op( type=self.type, inputs={ @@ -510,3 +511,51 @@ class AdamaxOptimizer(Optimizer): attrs={"scale": self._beta1}) return [scale_beta1] + + +class DecayedAdagradOptimizer(Optimizer): + """Simple Decayed Adagrad optimizer with moment state + """ + _moment_acc_str = "moment" + + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + global_step=None): + assert learning_rate is not None + assert decay is not None + assert epsilon is not None + + super(DecayedAdagradOptimizer, self).__init__(global_step) + self.type = "decayed_adagrad" + self._learning_rate = learning_rate + self._decay = decay + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment_acc = self._get_accumulator(self._moment_acc_str, + param_and_grad[0]) + + # Create the decayed adagrad optimizer op + decayed_adagrad_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": moment_acc, + "LearningRate": self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0], + "MomentOut": moment_acc}, + attrs={"epsilon": self._epsilon}) + + return decayed_adagrad_op diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index 5ef963bffa4e4fa3992e1f811d7f514662809410..a7f3bfc0caf76302674a00c80c2bd9ebf834f872 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -1,46 +1,23 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program -from paddle.v2.fluid.io import save_persistables, load_persistables +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.io import save_persistables, load_persistables +from paddle.v2.fluid.optimizer import SGDOptimizer -import numpy as np - -startup_program = Program() -main_program = Program() -x = layers.data( - name='x', - shape=[13], - data_type='float32', - main_program=main_program, - startup_program=startup_program) +x = layers.data(name='x', shape=[13], data_type='float32') -y_predict = layers.fc(input=x, - size=1, - act=None, - main_program=main_program, - startup_program=startup_program) +y_predict = layers.fc(input=x, size=1, act=None) -y = layers.data( - name='y', - shape=[1], - data_type='float32', - main_program=main_program, - startup_program=startup_program) +y = layers.data(name='y', shape=[1], data_type='float32') -cost = layers.square_error_cost( - input=y_predict, - label=y, - main_program=main_program, - startup_program=startup_program) -avg_cost = layers.mean( - x=cost, main_program=main_program, startup_program=startup_program) +cost = layers.square_error_cost(input=y_predict, label=y) +avg_cost = layers.mean(x=cost) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -opts = sgd_optimizer.minimize(avg_cost, startup_program) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) +opts = sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 @@ -52,12 +29,12 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -exe.run(startup_program, feed={}, fetch_list=[]) +exe.run(framework.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): - save_persistables(exe, "./fit_a_line.model/", main_program=main_program) - load_persistables(exe, "./fit_a_line.model/", main_program=main_program) + save_persistables(exe, "./fit_a_line.model/") + load_persistables(exe, "./fit_a_line.model/") for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32") @@ -69,7 +46,7 @@ for pass_id in range(PASS_NUM): tensor_y = core.LoDTensor() tensor_y.set(y_data, place) # print tensor_y.get_dims() - outs = exe.run(main_program, + outs = exe.run(framework.default_main_program(), feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost]) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index e253b8d27fd29746b41d82a63b11485032e77ebb..efe63a68f0745eb728b569a03d0344877c1484f7 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -1,23 +1,17 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.optimizer as optimizer +import paddle.v2.fluid.evaluator as evaluator from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.framework import g_startup_program, g_main_program from paddle.v2.fluid.initializer import XavierInitializer +from paddle.v2.fluid.optimizer import AdamOptimizer -def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): - def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu', - main_program=None, - startup_program=None): +def resnet_cifar10(input, depth=32): + def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): tmp = layers.conv2d( input=input, filter_size=filter_size, @@ -25,14 +19,8 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): stride=stride, padding=padding, act=None, - bias_attr=False, - main_program=main_program, - startup_program=startup_program) - return layers.batch_norm( - input=tmp, - act=act, - main_program=main_program, - startup_program=startup_program) + bias_attr=False) + return layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride, program, init_program): if ch_in != ch_out: @@ -41,99 +29,32 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): else: return input - def basicblock(input, - ch_in, - ch_out, - stride, - main_program=main_program, - startup_program=startup_program): - tmp = conv_bn_layer( - input, - ch_out, - 3, - stride, - 1, - main_program=main_program, - startup_program=startup_program) - tmp = conv_bn_layer( - tmp, - ch_out, - 3, - 1, - 1, - act=None, - main_program=main_program, - startup_program=startup_program) - short = shortcut(input, ch_in, ch_out, stride, main_program, - startup_program) - return layers.elementwise_add( - x=tmp, - y=short, - act='relu', - main_program=main_program, - startup_program=startup_program) + def basicblock(input, ch_in, ch_out, stride): + tmp = conv_bn_layer(input, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) + short = shortcut(input, ch_in, ch_out, stride) + return layers.elementwise_add(x=tmp, y=short, act='relu') - def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, - startup_program): - tmp = block_func(input, ch_in, ch_out, stride, program, startup_program) + def layer_warp(block_func, input, ch_in, ch_out, count, stride): + tmp = block_func(input, ch_in, ch_out, stride) for i in range(1, count): - tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program) + tmp = block_func(tmp, ch_out, ch_out, 1) return tmp assert (depth - 2) % 6 == 0 n = (depth - 2) / 6 conv1 = conv_bn_layer( - input=input, - ch_out=16, - filter_size=3, - stride=1, - padding=1, - main_program=main_program, - startup_program=startup_program) - res1 = layer_warp( - basicblock, - conv1, - 16, - 16, - n, - 1, - main_program=main_program, - startup_program=startup_program) - res2 = layer_warp( - basicblock, - res1, - 16, - 32, - n, - 2, - main_program=main_program, - startup_program=startup_program) - res3 = layer_warp( - basicblock, - res2, - 32, - 64, - n, - 2, - main_program=main_program, - startup_program=startup_program) + input=input, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) + res2 = layer_warp(basicblock, res1, 16, 32, n, 2) + res3 = layer_warp(basicblock, res2, 32, 64, n, 2) pool = layers.pool2d( - input=res3, - pool_size=8, - pool_type='avg', - pool_stride=1, - main_program=main_program, - startup_program=startup_program) + input=res3, pool_size=8, pool_type='avg', pool_stride=1) return pool -def vgg16_bn_drop(input, main_program=None, startup_program=None): - def conv_block(input, - num_filter, - groups, - dropouts, - main_program=None, - startup_program=None): +def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): return nets.img_conv_group( input=input, pool_size=2, @@ -143,51 +64,26 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None): conv_act='relu', conv_with_batchnorm=True, conv_batchnorm_drop_rate=dropouts, - pool_type='max', - main_program=main_program, - startup_program=startup_program) + pool_type='max') - conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program) - conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program, - startup_program) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program, - startup_program) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program, - startup_program) + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - drop = layers.dropout( - x=conv5, - dropout_prob=0.5, - main_program=main_program, - startup_program=startup_program) + drop = layers.dropout(x=conv5, dropout_prob=0.5) fc1 = layers.fc(input=drop, size=512, act=None, - param_attr={"initializer": XavierInitializer()}, - main_program=main_program, - startup_program=startup_program) - reshape1 = layers.reshape( - x=fc1, - shape=list(fc1.shape + (1, 1)), - main_program=main_program, - startup_program=startup_program) - bn = layers.batch_norm( - input=reshape1, - act='relu', - main_program=main_program, - startup_program=startup_program) - drop2 = layers.dropout( - x=bn, - dropout_prob=0.5, - main_program=main_program, - startup_program=startup_program) + param_attr={"initializer": XavierInitializer()}) + reshape1 = layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) + bn = layers.batch_norm(input=reshape1, act='relu') + drop2 = layers.dropout(x=bn, dropout_prob=0.5) fc2 = layers.fc(input=drop2, size=512, act=None, - param_attr={"initializer": XavierInitializer()}, - main_program=main_program, - startup_program=startup_program) + param_attr={"initializer": XavierInitializer()}) return fc2 @@ -208,12 +104,13 @@ net = vgg16_bn_drop(images) predict = layers.fc(input=net, size=classdim, act='softmax') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy(input=predict, label=label) -# optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -optimizer = optimizer.AdamOptimizer(learning_rate=0.001) +# optimizer = SGDOptimizer(learning_rate=0.001) +optimizer = AdamOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) + BATCH_SIZE = 128 PASS_NUM = 1 @@ -225,10 +122,11 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -exe.run(g_startup_program, feed={}, fetch_list=[]) +exe.run(framework.default_startup_program()) for pass_id in range(PASS_NUM): batch_id = 0 + accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape(data_shape), data)).astype("float32") @@ -243,15 +141,17 @@ for pass_id in range(PASS_NUM): tensor_img.set(img_data, place) tensor_y.set(y_data, place) - outs = exe.run(g_main_program, + outs = exe.run(framework.default_main_program(), feed={"pixel": tensor_img, "label": tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost, acc_out]) loss = np.array(outs[0]) acc = np.array(outs[1]) + pass_acc = accuracy.eval(exe) print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + - " loss:" + str(loss) + " acc:" + str(acc)) + " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( + pass_acc)) batch_id = batch_id + 1 if batch_id > 1: diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py new file mode 100644 index 0000000000000000000000000000000000000000..f66e6e748b76dec53a9e24b5b352d31395ce6bde --- /dev/null +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -0,0 +1,192 @@ +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 +import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.executor import Executor, g_scope +from paddle.v2.fluid.optimizer import SGDOptimizer + +word_dict, verb_dict, label_dict = conll05.get_dict() +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_len = len(verb_dict) + +mark_dict_len = 2 +word_dim = 32 +mark_dim = 5 +hidden_dim = 512 +depth = 8 +mix_hidden_lr = 1e-3 + +IS_SPARSE = True +PASS_NUM = 10 +BATCH_SIZE = 20 + +embedding_name = 'emb' + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def db_lstm(): + # 8 features + word = layers.data(name='word_data', shape=[1], data_type='int64') + predicate = layers.data(name='verb_data', shape=[1], data_type='int64') + ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64') + ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64') + ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64') + ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64') + ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64') + mark = layers.data(name='mark_data', shape=[1], data_type='int64') + + predicate_embedding = layers.embedding( + input=predicate, + size=[pred_len, word_dim], + data_type='float32', + is_sparse=IS_SPARSE, + param_attr={'name': 'vemb'}) + + mark_embedding = layers.embedding( + input=mark, + size=[mark_dict_len, mark_dim], + data_type='float32', + is_sparse=IS_SPARSE) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + layers.embedding( + size=[word_dict_len, word_dim], + input=x, + param_attr={'name': embedding_name, + 'trainable': False}) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + layers.fc(input=emb, size=hidden_dim) for emb in emb_layers + ] + + hidden_0 = layers.sums(input=hidden_0_layers) + + lstm_0 = layers.dynamic_lstm( + input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = layers.sums(input=[ + layers.fc(input=input_tmp[0], size=hidden_dim), + layers.fc(input=input_tmp[1], size=hidden_dim) + ]) + + lstm = layers.dynamic_lstm( + input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = layers.sums(input=[ + layers.fc(input=input_tmp[0], size=label_dict_len), + layers.fc(input=input_tmp[1], size=label_dict_len) + ]) + + return feature_out + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = core.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + # define network topology + feature_out = db_lstm() + target = layers.data(name='target', shape=[1], data_type='int64') + crf_cost = layers.linear_chain_crf( + input=feature_out, + label=target, + param_attr={"name": 'crfw', + "learning_rate": mix_hidden_lr}) + avg_cost = layers.mean(x=crf_cost) + # TODO(qiao) + # 1. add crf_decode_layer and evaluator + # 2. use other optimizer and check why out will be NAN + sgd_optimizer = SGDOptimizer(learning_rate=0.0001) + opts = sgd_optimizer.minimize(avg_cost) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + place = core.CPUPlace() + exe = Executor(place) + + exe.run(framework.default_startup_program()) + + embedding_param = g_scope.find_var(embedding_name).get_tensor() + embedding_param.set( + load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) + + batch_id = 0 + for pass_id in xrange(PASS_NUM): + for data in train_data(): + word_data = to_lodtensor(map(lambda x: x[0], data), place) + ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place) + ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place) + ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place) + ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place) + ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place) + verb_data = to_lodtensor(map(lambda x: x[6], data), place) + mark_data = to_lodtensor(map(lambda x: x[7], data), place) + target = to_lodtensor(map(lambda x: x[8], data), place) + + outs = exe.run(framework.default_main_program(), + feed={ + 'word_data': word_data, + 'ctx_n2_data': ctx_n2_data, + 'ctx_n1_data': ctx_n1_data, + 'ctx_0_data': ctx_0_data, + 'ctx_p1_data': ctx_p1_data, + 'ctx_p2_data': ctx_p2_data, + 'verb_data': verb_data, + 'mark_data': mark_data, + 'target': target + }, + fetch_list=[avg_cost]) + avg_cost_val = np.array(outs[0]) + + if batch_id % 10 == 0: + print("avg_cost=" + str(avg_cost_val)) + + # exit early for CI + exit(0) + + batch_id = batch_id + 1 + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 2b723125412c17f3805ee3cae046b0788aa34997..8f737689609fec4d1819ae58b9665298547a3716 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -1,69 +1,37 @@ +import numpy as np import paddle.v2 as paddle +import paddle.v2.fluid.core as core +import paddle.v2.fluid.evaluator as evaluator +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import AdamOptimizer -import numpy as np - -startup_program = Program() -main_program = Program() - -images = layers.data( - name='pixel', - shape=[1, 28, 28], - data_type='float32', - main_program=main_program, - startup_program=startup_program) -label = layers.data( - name='label', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) +images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') +label = layers.data(name='label', shape=[1], data_type='int64') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, num_filters=20, pool_size=2, pool_stride=2, - act="relu", - main_program=main_program, - startup_program=startup_program) + act="relu") conv_pool_2 = nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, num_filters=50, pool_size=2, pool_stride=2, - act="relu", - main_program=main_program, - startup_program=startup_program) + act="relu") -predict = layers.fc(input=conv_pool_2, - size=10, - act="softmax", - main_program=main_program, - startup_program=startup_program) -cost = layers.cross_entropy( - input=predict, - label=label, - main_program=main_program, - startup_program=startup_program) -avg_cost = layers.mean(x=cost, main_program=main_program) -accuracy = layers.accuracy( - input=predict, - label=label, - main_program=main_program, - startup_program=startup_program) +predict = layers.fc(input=conv_pool_2, size=10, act="softmax") +cost = layers.cross_entropy(input=predict, label=label) +avg_cost = layers.mean(x=cost) +optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) +opts = optimizer.minimize(avg_cost) -# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0, -# momentum=0.9) -optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) -opts = optimizer.minimize(avg_cost, startup_program) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) BATCH_SIZE = 50 PASS_NUM = 3 @@ -75,10 +43,10 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -exe.run(startup_program, feed={}, fetch_list=[]) +exe.run(framework.default_startup_program()) for pass_id in range(PASS_NUM): - count = 0 + accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]), data)).astype("float32") @@ -90,14 +58,21 @@ for pass_id in range(PASS_NUM): tensor_img.set(img_data, place) tensor_y.set(y_data, place) - outs = exe.run(main_program, + outs = exe.run(framework.default_main_program(), feed={"pixel": tensor_img, "label": tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost, acc_out]) loss = np.array(outs[0]) acc = np.array(outs[1]) - - if loss < 10.0 and acc > 0.9: + pass_acc = accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" + + str(pass_acc)) + # print loss, acc + if loss < 10.0 and pass_acc > 0.9: # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. exit(0) + + pass_acc = accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) + exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 2e1a9f236b6621c7334a9eb04272a6eb69c86fab..e42e4c9cc0024e193b0732df6d9ca3200df5f0b9 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,24 +1,16 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.evaluator as evaluator from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.regularizer import L2DecayRegularizer from paddle.v2.fluid.initializer import UniformInitializer - -import numpy as np +from paddle.v2.fluid.optimizer import MomentumOptimizer +from paddle.v2.fluid.regularizer import L2DecayRegularizer BATCH_SIZE = 128 -startup_program = Program() -main_program = Program() -image = layers.data( - name='x', - shape=[784], - data_type='float32', - main_program=main_program, - startup_program=startup_program) +image = layers.data(name='x', shape=[784], data_type='float32') param_attr = { 'name': None, @@ -27,48 +19,23 @@ param_attr = { 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) } -hidden1 = layers.fc(input=image, - size=128, - act='relu', - main_program=main_program, - startup_program=startup_program, - param_attr=param_attr) -hidden2 = layers.fc(input=hidden1, - size=64, - act='relu', - main_program=main_program, - startup_program=startup_program, - param_attr=param_attr) +hidden1 = layers.fc(input=image, size=128, act='relu', param_attr=param_attr) +hidden2 = layers.fc(input=hidden1, size=64, act='relu', param_attr=param_attr) predict = layers.fc(input=hidden2, size=10, act='softmax', - main_program=main_program, - startup_program=startup_program, param_attr=param_attr) -label = layers.data( - name='y', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) - -cost = layers.cross_entropy( - input=predict, - label=label, - main_program=main_program, - startup_program=startup_program) -avg_cost = layers.mean( - x=cost, main_program=main_program, startup_program=startup_program) -accuracy = layers.accuracy( - input=predict, - label=label, - main_program=main_program, - startup_program=startup_program) - -optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) -opts = optimizer.minimize(avg_cost, startup_program) +label = layers.data(name='y', shape=[1], data_type='int64') + +cost = layers.cross_entropy(input=predict, label=label) +avg_cost = layers.mean(x=cost) + +optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) +opts = optimizer.minimize(avg_cost) + +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) train_reader = paddle.batch( paddle.reader.shuffle( @@ -78,10 +45,11 @@ train_reader = paddle.batch( place = core.CPUPlace() exe = Executor(place) -exe.run(startup_program, feed={}, fetch_list=[]) +exe.run(framework.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): + accuracy.reset(exe) for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") @@ -93,12 +61,16 @@ for pass_id in range(PASS_NUM): tensor_y = core.LoDTensor() tensor_y.set(y_data, place) - outs = exe.run(main_program, + outs = exe.run(framework.default_main_program(), feed={'x': tensor_x, 'y': tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost, acc_out]) out = np.array(outs[0]) acc = np.array(outs[1]) - if out[0] < 5.0: - exit(0) # if avg cost less than 5.0, we think our code is good. + pass_acc = accuracy.eval(exe) + + if pass_acc > 0.7: + exit(0) + # print("pass_id=" + str(pass_id) + " auc=" + + # str(acc) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index 4708dfe3e9209a3254a9e1903cbedf07ebc5d2d0..55ded3aed3a23c8cd7795f915dc1cbd512c6d945 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -1,18 +1,14 @@ +import numpy as np import paddle.v2 as paddle +import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import SGDOptimizer -import numpy as np - -startup_program = Program() -main_program = Program() -is_sparse = True -use_gpu = False +IS_SPARSE = True +USE_GPU = False BATCH_SIZE = 256 @@ -22,102 +18,55 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data( - name='user_id', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) + uid = layers.data(name='user_id', shape=[1], data_type='int64') usr_emb = layers.embedding( input=uid, data_type='float32', size=[USR_DICT_SIZE, 32], param_attr={'name': 'user_table'}, - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE) - usr_fc = layers.fc(input=usr_emb, - size=32, - main_program=main_program, - startup_program=startup_program) + usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data( - name='gender_id', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) + usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr={'name': 'gender_table'}, - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE) - usr_gender_fc = layers.fc(input=usr_gender_emb, - size=16, - main_program=main_program, - startup_program=startup_program) + usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data( - name='age_id', - shape=[1], - data_type="int64", - main_program=main_program, - startup_program=startup_program) + usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") usr_age_emb = layers.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], - is_sparse=is_sparse, - param_attr={'name': 'age_table'}, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE, + param_attr={'name': 'age_table'}) - usr_age_fc = layers.fc(input=usr_age_emb, - size=16, - main_program=main_program, - startup_program=startup_program) + usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data( - name='job_id', - shape=[1], - data_type="int64", - main_program=main_program, - startup_program=startup_program) + usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") usr_job_emb = layers.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr={'name': 'job_table'}, - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE) - usr_job_fc = layers.fc(input=usr_job_emb, - size=16, - main_program=main_program, - startup_program=startup_program) + usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( - input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], - axis=1, - main_program=main_program, - startup_program=startup_program) + input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) - usr_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh", - main_program=main_program, - startup_program=startup_program) + usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features @@ -126,86 +75,46 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data( - name='movie_id', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) + mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') mov_emb = layers.embedding( input=mov_id, data_type='float32', size=[MOV_DICT_SIZE, 32], param_attr={'name': 'movie_table'}, - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE) - mov_fc = layers.fc(input=mov_emb, - size=32, - main_program=main_program, - startup_program=startup_program) + mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) + category_id = layers.data(name='category_id', shape=[1], data_type='int64') mov_categories_emb = layers.embedding( - input=category_id, - size=[CATEGORY_DICT_SIZE, 32], - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( - input=mov_categories_emb, - pool_type="sum", - main_program=main_program, - startup_program=startup_program) + input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) + mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') mov_title_emb = layers.embedding( - input=mov_title_id, - size=[MOV_TITLE_DICT_SIZE, 32], - is_sparse=is_sparse, - main_program=main_program, - startup_program=startup_program) + input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", - pool_type="sum", - main_program=main_program, - startup_program=startup_program) + pool_type="sum") concat_embed = layers.concat( - input=[mov_fc, mov_categories_hidden, mov_title_conv], - axis=1, - main_program=main_program, - startup_program=startup_program) + input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator - mov_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh", - main_program=main_program, - startup_program=startup_program) + mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features @@ -215,46 +124,29 @@ def model(): mov_combined_features = get_mov_combined_features() # need cos sim - inference = layers.cos_sim( - X=usr_combined_features, - Y=mov_combined_features, - main_program=main_program, - startup_program=startup_program) - - label = layers.data( - name='score', - shape=[1], - data_type='float32', - main_program=main_program, - startup_program=startup_program) + inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) + + label = layers.data(name='score', shape=[1], data_type='float32') - square_cost = layers.square_error_cost( - input=inference, - label=label, - main_program=main_program, - startup_program=startup_program) + square_cost = layers.square_error_cost(input=inference, label=label) - avg_cost = layers.mean( - x=square_cost, - main_program=main_program, - startup_program=startup_program) + avg_cost = layers.mean(x=square_cost) return avg_cost def main(): cost = model() - sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) - opts = sgd_optimizer.minimize(cost, startup_program=startup_program) - block = main_program.block(0) + sgd_optimizer = SGDOptimizer(learning_rate=0.2) + opts = sgd_optimizer.minimize(cost) - if use_gpu: + if USE_GPU: place = core.GPUPlace(0) else: place = core.CPUPlace() exe = Executor(place) - exe.run(startup_program, feed={}, fetch_list=[]) + exe.run(framework.default_startup_program()) train_reader = paddle.batch( paddle.reader.shuffle( @@ -303,7 +195,7 @@ def main(): PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): - outs = exe.run(main_program, + outs = exe.run(framework.default_main_program(), feed=func_feed(feeding, data), fetch_list=[cost]) out = np.array(outs[0]) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index dc4b63da9b37aff55fc6362f239e3e61004a3866..4929f7cf615e61de5c4f61ef44c5340e9ac4492a 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,13 +1,12 @@ +import numpy as np import paddle.v2 as paddle +import paddle.v2.fluid.core as core +import paddle.v2.fluid.evaluator as evaluator +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): @@ -32,10 +31,10 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): act="softmax") cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) + return avg_cost, accuracy, acc_out def to_lodtensor(data, place): @@ -61,7 +60,8 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = convolution_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = convolution_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( @@ -70,9 +70,10 @@ def main(): place = core.CPUPlace() exe = Executor(place) - exe.run(g_startup_program) + exe.run(framework.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) @@ -82,15 +83,16 @@ def main(): tensor_label = core.LoDTensor() tensor_label.set(label, place) - outs = exe.run(g_main_program, + outs = exe.run(framework.default_main_program(), feed={"words": tensor_words, "label": tensor_label}, - fetch_list=[cost, acc]) + fetch_list=[cost, acc_out]) cost_val = np.array(outs[0]) acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and pass_acc > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 6d507f4c8e39ba039603a5b7618e7a82d1dcb21b..b3ee91938865afb929670a388a561b156aec1fe9 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,13 +1,11 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program +import paddle.v2.fluid.evaluator as evaluator +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def stacked_lstm_net(input_dim, @@ -42,10 +40,10 @@ def stacked_lstm_net(input_dim, act='softmax') cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) + return avg_cost, accuracy, acc_out def to_lodtensor(data, place): @@ -72,7 +70,8 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = stacked_lstm_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = stacked_lstm_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( @@ -81,9 +80,10 @@ def main(): place = core.CPUPlace() exe = Executor(place) - exe.run(g_startup_program) + exe.run(framework.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) @@ -93,15 +93,16 @@ def main(): tensor_label = core.LoDTensor() tensor_label.set(label, place) - outs = exe.run(g_main_program, + outs = exe.run(framework.default_main_program(), feed={"words": tensor_words, "label": tensor_label}, - fetch_list=[cost, acc]) + fetch_list=[cost, acc_out]) cost_val = np.array(outs[0]) acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and acc_val > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index 848dcce974a107402c33013e9f84211fd4979e21..9a51a2f207ebed340b8e5c60e7ebeb82a611dbc5 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -1,12 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import g_main_program, g_startup_program +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): @@ -34,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) @@ -56,17 +54,17 @@ def to_lodtensor(data, place): return res -def chop_data(data, chop_len=80, batch_len=50): +def chop_data(data, chop_len=80, batch_size=50): data = [(x[0][:chop_len], x[1]) for x in data if len(x[0]) >= chop_len] - return data[:batch_len] + return data[:batch_size] def prepare_feed_data(data, place): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) label = np.array(map(lambda x: x[1], data)).astype("int64") - label = label.reshape([50, 1]) + label = label.reshape([len(label), 1]) tensor_label = core.LoDTensor() tensor_label.set(label, place) @@ -74,33 +72,41 @@ def prepare_feed_data(data, place): def main(): - word_dict = paddle.dataset.imdb.word_dict() - cost, acc = lstm_net(dict_dim=len(word_dict), class_dim=2) + BATCH_SIZE = 100 + PASS_NUM = 5 - batch_size = 100 - train_data = paddle.batch( - paddle.reader.buffered( - paddle.dataset.imdb.train(word_dict), size=batch_size * 10), - batch_size=batch_size) + word_dict = paddle.dataset.imdb.word_dict() + print "load word dict successfully" + dict_dim = len(word_dict) + class_dim = 2 - data = chop_data(next(train_data())) + cost, acc = lstm_net(dict_dim=dict_dim, class_dim=class_dim) + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=BATCH_SIZE * 10), + batch_size=BATCH_SIZE) place = core.CPUPlace() - tensor_words, tensor_label = prepare_feed_data(data, place) exe = Executor(place) - exe.run(g_startup_program) - - while True: - outs = exe.run(g_main_program, - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if acc_val > 0.9: - break + + exe.run(framework.default_startup_program()) + + for pass_id in xrange(PASS_NUM): + for data in train_data(): + chopped_data = chop_data(data) + tensor_words, tensor_label = prepare_feed_data(chopped_data, place) + + outs = exe.run(framework.default_main_program(), + feed={"words": tensor_words, + "label": tensor_label}, + fetch_list=[cost, acc]) + cost_val = np.array(outs[0]) + acc_val = np.array(outs[1]) + + print("cost=" + str(cost_val) + " acc=" + str(acc_val)) + if acc_val > 0.7: + exit(0) + exit(1) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 054dbd5a3d090ba8a08e8f101de11c69ddd36d8a..afa7b285198e0349317e123e4bd98e8336217afa 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,119 +1,63 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer - -from paddle.v2.fluid.framework import Program +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import SGDOptimizer -import numpy as np - -startup_program = Program() -main_program = Program() - -embed_size = 32 -hidden_size = 256 +PASS_NUM = 100 +EMBED_SIZE = 32 +HIDDEN_SIZE = 256 N = 5 -batch_size = 32 -is_sparse = True +BATCH_SIZE = 32 +IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data( - name='firstw', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) -second_word = layers.data( - name='secondw', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) -third_word = layers.data( - name='thirdw', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) -forth_word = layers.data( - name='forthw', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) -next_word = layers.data( - name='nextw', - shape=[1], - data_type='int64', - main_program=main_program, - startup_program=startup_program) +first_word = layers.data(name='firstw', shape=[1], data_type='int64') +second_word = layers.data(name='secondw', shape=[1], data_type='int64') +third_word = layers.data(name='thirdw', shape=[1], data_type='int64') +forth_word = layers.data(name='forthw', shape=[1], data_type='int64') +next_word = layers.data(name='nextw', shape=[1], data_type='int64') embed_first = layers.embedding( input=first_word, - size=[dict_size, embed_size], + size=[dict_size, EMBED_SIZE], data_type='float32', - is_sparse=is_sparse, - param_attr={'name': 'shared_w'}, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE, + param_attr={'name': 'shared_w'}) embed_second = layers.embedding( input=second_word, - size=[dict_size, embed_size], + size=[dict_size, EMBED_SIZE], data_type='float32', - is_sparse=is_sparse, - param_attr={'name': 'shared_w'}, - main_program=main_program, - startup_program=startup_program) - + is_sparse=IS_SPARSE, + param_attr={'name': 'shared_w'}) embed_third = layers.embedding( input=third_word, - size=[dict_size, embed_size], + size=[dict_size, EMBED_SIZE], data_type='float32', - is_sparse=is_sparse, - param_attr={'name': 'shared_w'}, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE, + param_attr={'name': 'shared_w'}) embed_forth = layers.embedding( input=forth_word, - size=[dict_size, embed_size], + size=[dict_size, EMBED_SIZE], data_type='float32', - is_sparse=is_sparse, - param_attr={'name': 'shared_w'}, - main_program=main_program, - startup_program=startup_program) + is_sparse=IS_SPARSE, + param_attr={'name': 'shared_w'}) concat_embed = layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], - axis=1, - main_program=main_program, - startup_program=startup_program) - -hidden1 = layers.fc(input=concat_embed, - size=hidden_size, - act='sigmoid', - main_program=main_program, - startup_program=startup_program) -predict_word = layers.fc(input=hidden1, - size=dict_size, - act='softmax', - main_program=main_program, - startup_program=startup_program) -cost = layers.cross_entropy( - input=predict_word, - label=next_word, - main_program=main_program, - startup_program=startup_program) -avg_cost = layers.mean( - x=cost, main_program=main_program, startup_program=startup_program) - -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -opts = sgd_optimizer.minimize(avg_cost, startup_program) + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) +hidden1 = layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') +predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') +cost = layers.cross_entropy(input=predict_word, label=next_word) +avg_cost = layers.mean(x=cost) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) +opts = sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( - paddle.dataset.imikolov.train(word_dict, N), batch_size) + paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) place = core.CPUPlace() exe = Executor(place) @@ -122,8 +66,8 @@ exe = Executor(place) # below exit line. exit(0) -exe.run(startup_program, feed={}, fetch_list=[]) -PASS_NUM = 100 +exe.run(framework.default_startup_program()) + for pass_id in range(PASS_NUM): for data in train_reader(): input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)] @@ -150,7 +94,7 @@ for pass_id in range(PASS_NUM): next_tensor = core.LoDTensor() next_tensor.set(next_data, place) - outs = exe.run(main_program, + outs = exe.run(framework.default_main_program(), feed={ 'firstw': first_tensor, 'secondw': second_tensor, diff --git a/python/paddle/v2/fluid/tests/test_accuracy_op.py b/python/paddle/v2/fluid/tests/test_accuracy_op.py index 6536c297e8e559bf04fe6ef3b0e2dadd1914eb87..6f72918b7178bc1f856010f1111f18842f6cc34a 100644 --- a/python/paddle/v2/fluid/tests/test_accuracy_op.py +++ b/python/paddle/v2/fluid/tests/test_accuracy_op.py @@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest): num_correct += 1 break self.outputs = { - 'Accuracy': np.array([num_correct / float(n)]).astype("float32") + 'Accuracy': np.array([num_correct / float(n)]).astype("float32"), + 'Correct': np.array([num_correct]).astype("int32"), + 'Total': np.array([n]).astype("int32") } def test_check_output(self): diff --git a/python/paddle/v2/fluid/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py index 66bad349e59b608cb3cc965401c81ef4c716b318..903e84c32887100bbeef6ebf81f66f06f084fab5 100644 --- a/python/paddle/v2/fluid/tests/test_adagrad_op.py +++ b/python/paddle/v2/fluid/tests/test_adagrad_op.py @@ -1,6 +1,9 @@ import unittest import numpy as np +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator from op_test import OpTest +import math class TestAdagradOp1(OpTest): @@ -65,5 +68,110 @@ class TestAdagradOp2(OpTest): self.check_output() +class TestSparseAdagradOp(unittest.TestCase): + def check_with_place(self, place): + scope = core.Scope() + + # create and initialize Grad Variable + height = 10 + rows = [0, 4, 7, 4] + row_numel = 12 + + grad_selected_rows = scope.var('Grad').get_selected_rows() + grad_selected_rows.set_height(height) + grad_selected_rows.set_rows(rows) + np_array = np.ones((len(rows), row_numel)).astype("float32") + np_array[0, 0] = 2.0 + np_array[2, 8] = 4.0 + + grad_tensor = grad_selected_rows.get_tensor() + grad_tensor.set(np_array, place) + + # create and initialize Param Variable + param = scope.var('Param').get_tensor() + param_array = np.full((height, row_numel), 5.0).astype("float32") + param.set(param_array, place) + + # create and initialize LeraningRate Variable + lr = scope.var('LearningRate').get_tensor() + lr_array = np.full((1), 2.0).astype("float32") + lr.set(lr_array, place) + + # create and initialize moment Variable + moment = scope.var('Moment').get_tensor() + moment_np_array = np.full((height, row_numel), 2.0).astype("float32") + moment.set(moment_np_array, place) + + # create and run sgd operator + adagrad_op = Operator( + "adagrad", + Param='Param', + Grad='Grad', + ParamOut='Param', + Moment='Moment', + MomentOut='Moment', + LearningRate='LearningRate', + epsilon=2.0) + + ctx = core.DeviceContext.create(place) + adagrad_op.run(scope, ctx) + + # get and compare moment result + moment_result_array = np.array(moment) + + self.assertAlmostEqual(6.0, moment_result_array[rows[0], 0]) + self.assertAlmostEqual(3.0, moment_result_array[rows[0], 2]) + self.assertAlmostEqual(2.0, moment_result_array[1, 0]) + # 2.0 + (1.0 + 1.0)^2 + self.assertAlmostEqual(6.0, moment_result_array[rows[1], 10]) + self.assertAlmostEqual(6.0, moment_result_array[rows[3], 4]) + + self.assertAlmostEqual(2.0, moment_result_array[5, 8]) + self.assertAlmostEqual(3.0, moment_result_array[rows[2], 1]) + self.assertAlmostEqual(18.0, moment_result_array[rows[2], 8]) + + # get and compare param result + result_array = np.array(param) + + def get_out(param, lr, grad, m, epsilon): + return param - lr * grad / (math.sqrt(m) + epsilon) + + self.assertAlmostEqual( + get_out(5.0, 2.0, 2.0, 6.0, 2.0), + result_array[rows[0], 0], + places=5) + self.assertAlmostEqual( + get_out(5.0, 2.0, 1.0, 3.0, 2.0), + result_array[rows[0], 2], + places=5) + self.assertAlmostEqual( + get_out(5.0, 2.0, 0.0, 2.0, 2.0), result_array[1, 0], places=5) + + # grad_merge = 1.0 + 1.0 + # m = 6.0 + self.assertAlmostEqual( + get_out(5.0, 2.0, 2.0, 6.0, 2.0), + result_array[rows[1], 10], + places=5) + + self.assertAlmostEqual( + get_out(5.0, 2.0, 0.0, 2.0, 2.0), result_array[5, 8], places=5) + self.assertAlmostEqual( + get_out(5.0, 2.0, 1.0, 3.0, 2.0), + result_array[rows[2], 1], + places=5) + self.assertAlmostEqual( + get_out(5.0, 2.0, 4.0, 18.0, 2.0), + result_array[rows[2], 8], + places=5) + + def test_sparse_adagrad(self): + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + for place in places: + self.check_with_place(place) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py similarity index 96% rename from python/paddle/v2/framework/tests/test_beam_search_decode_op.py rename to python/paddle/v2/fluid/tests/test_beam_search_decode_op.py index e9f180bbaea0f5922bee0a3e2a8c715d683c0d16..8a11820d2aba2dd4d17d925f0e0fe9f324100418 100644 --- a/python/paddle/v2/framework/tests/test_beam_search_decode_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py @@ -1,8 +1,8 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator class TestBeamSearchDecodeOp(unittest.TestCase): diff --git a/python/paddle/v2/fluid/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py new file mode 100644 index 0000000000000000000000000000000000000000..cc7c09bb59de3f83e47b4d95c1203f7f050c5132 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py @@ -0,0 +1,65 @@ +import logging +from paddle.v2.fluid.op import Operator, DynamicRecurrentOp +import paddle.v2.fluid.core as core +import unittest +import numpy as np + + +def create_tensor(scope, name, np_data): + tensor = scope.var(name).get_tensor() + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class BeamSearchOpTester(unittest.TestCase): + def setUp(self): + self.scope = core.Scope() + self.ctx = core.DeviceContext.create(core.CPUPlace()) + self._create_ids() + self._create_scores() + self._create_pre_ids() + self.scope.var('selected_ids') + self.scope.var('selected_scores') + + def test_run(self): + op = Operator( + 'beam_search', + pre_ids="pre_ids", + ids='ids', + scores='scores', + selected_ids='selected_ids', + selected_scores='selected_scores', + level=0, + beam_size=2, + end_id=0, ) + op.run(self.scope, self.ctx) + selected_ids = self.scope.find_var("selected_ids").get_tensor() + print 'selected_ids', np.array(selected_ids) + print 'lod', selected_ids.lod() + + def _create_pre_ids(self): + np_data = np.array([[1, 2, 3, 4]], dtype='int32') + tensor = create_tensor(self.scope, "pre_ids", np_data) + + def _create_ids(self): + self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]] + np_data = np.array( + [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int32') + tensor = create_tensor(self.scope, "ids", np_data) + tensor.set_lod(self.lod) + + def _create_scores(self): + np_data = np.array( + [ + [0.5, 0.3, 0.2], + [0.6, 0.3, 0.1], + [0.9, 0.5, 0.1], + [0.7, 0.5, 0.1], + ], + dtype='float32') + tensor = create_tensor(self.scope, "scores", np_data) + tensor.set_lod(self.lod) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index 04ae7f294c27fdceaaff2e9a7ed854213e643945..2240dc73cdd31f320fed174dd811e93c6640137f 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -10,23 +10,33 @@ def conv2d_forward_naive(input, filter, group, conv_param): assert np.mod(out_c, group) == 0 sub_out_c = out_c / group - stride, pad = conv_param['stride'], conv_param['pad'] - out_h = 1 + (in_h + 2 * pad[0] - f_h) / stride[0] - out_w = 1 + (in_w + 2 * pad[1] - f_w) / stride[1] + stride, pad, dilation = conv_param['stride'], conv_param['pad'], conv_param[ + 'dilation'] + out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) / stride[0] + out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) / stride[1] out = np.zeros((in_n, out_c, out_h, out_w)) + d_bolck_w = (dilation[0] * (f_h - 1) + 1) + d_bolck_h = (dilation[1] * (f_w - 1) + 1) + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], )), mode='constant', constant_values=0) + + filter_dilation = np.zeros((out_c, f_c, d_bolck_h, d_bolck_w)) + filter_dilation[:, :, 0:d_bolck_h:dilation[0], 0:d_bolck_w:dilation[ + 1]] = filter + for i in range(out_h): for j in range(out_w): for g in range(group): input_pad_masked = \ input_pad[:, g * f_c:(g + 1) * f_c, - i * stride[0]:i * stride[0] + f_h, - j * stride[1]:j * stride[1] + f_w] + i * stride[0]:i * stride[0] + d_bolck_h, + j * stride[1]:j * stride[1] + d_bolck_w] - f_sub = filter[g * sub_out_c:(g + 1) * sub_out_c, :, :, :] + f_sub = filter_dilation[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :] for k in range(sub_out_c): out[:, g * sub_out_c + k, i, j] = \ np.sum(input_pad_masked * f_sub[k, :, :, :], @@ -39,9 +49,14 @@ class TestConv2dOp(OpTest): def setUp(self): self.init_op_type() self.init_group() + self.init_dilation() self.init_test_case() - conv2d_param = {'stride': self.stride, 'pad': self.pad} + conv2d_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilation': self.dilations + } input = np.random.random(self.input_size).astype("float32") filter = np.random.random(self.filter_size).astype("float32") output = conv2d_forward_naive(input, filter, self.groups, @@ -80,12 +95,14 @@ class TestConv2dOp(OpTest): def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] - self.dilations = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW assert np.mod(self.input_size[1], self.groups) == 0 f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3] + def init_dilation(self): + self.dilations = [1, 1] + def init_group(self): self.groups = 1 @@ -93,32 +110,90 @@ class TestConv2dOp(OpTest): self.op_type = "conv2d" +class TestWithPad(TestConv2dOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + +class TestWithStride(TestConv2dOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + class TestWithGroup(TestConv2dOp): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv2d" +class TestWith1x1(TestConv2dOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 1, 1] -#----------------Conv2dCudnn---------------- + def init_group(self): + self.groups = 3 -class TestCudnn(TestConv2dOp): +class TestWithDilation(TestConv2dOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + def init_group(self): - self.groups = 1 + self.groups = 3 + + +#----------------Conv2dCudnn---------------- +class TestCudnn(TestConv2dOp): + def init_op_type(self): + self.op_type = "conv_cudnn" + +class TestCudnnWithPad(TestWithPad): def init_op_type(self): self.op_type = "conv_cudnn" -class TestCudnnWithGroup(TestConv2dOp): - def init_group(self): - self.groups = 3 +class TestCudnnWithStride(TestWithStride): + def init_op_type(self): + self.op_type = "conv_cudnn" + +class TestCudnnWithGroup(TestWithGroup): def init_op_type(self): self.op_type = "conv_cudnn" +class TestCudnnWith1x1(TestWith1x1): + def init_op_type(self): + self.op_type = "conv_cudnn" + + +# cudnn v5 does not support dilation conv. +# class TestCudnnWithDilation(TestWithDilation): +# def init_op_type(self): +# self.op_type = "conv_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py index 54349c018c4a53b8767d6cd4f94d99c719dc0237..d7b1f2f2a3abf6335998742dbbef8e17794170fa 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py @@ -4,9 +4,7 @@ from op_test import OpTest def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): - # [2, 3, 5, 5] in_n, in_c, in_h, in_w = input_.shape - # [3, 6, 3, 3] f_c, out_c, f_h, f_w = filter_.shape assert in_c == f_c @@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): j1, j2 = j * stride[0], j * stride[0] + f_w out[n, k, i1:i2, j1:j2] += tmp_out + out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]] return out @@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.init_op_type() - - # [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7] self.init_test_case() conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad} @@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - print 'check output here for', self.op_type self.check_output() def test_check_grad_no_input(self): @@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest): self.op_type = "conv2d_transpose" +class TestWithPad(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + +class TestWithStride(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + # ------------ test_cudnn ------------ class TestCudnn(TestConv2dTransposeOp): def init_op_type(self): diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py index 44c192f58d25f8ddaa38d2ba7c7c19b9a5bd7dc1..934ea46437d67b78309a86a2779e0c6577399136 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py @@ -10,27 +10,40 @@ def conv3d_forward_naive(input, filter, group, conv_param): assert np.mod(out_c, group) == 0 sub_out_c = out_c / group - stride, pad = conv_param['stride'], conv_param['pad'] - out_d = 1 + (in_d + 2 * pad[0] - f_h) / stride[0] - out_h = 1 + (in_h + 2 * pad[1] - f_h) / stride[1] - out_w = 1 + (in_w + 2 * pad[2] - f_w) / stride[2] + stride, pad, dilation = conv_param['stride'], conv_param['pad'], conv_param[ + 'dilations'] + + out_d = 1 + (in_d + 2 * pad[0] - (dilation[0] * (f_d - 1) + 1)) / stride[0] + out_h = 1 + (in_h + 2 * pad[1] - (dilation[1] * (f_h - 1) + 1)) / stride[1] + out_w = 1 + (in_w + 2 * pad[2] - (dilation[2] * (f_w - 1) + 1)) / stride[2] + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + d_bolck_d = (dilation[0] * (f_d - 1) + 1) + d_bolck_h = (dilation[1] * (f_h - 1) + 1) + d_bolck_w = (dilation[2] * (f_w - 1) + 1) + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], ), (pad[2], )), mode='constant', constant_values=0) + + filter_dilation = np.zeros((out_c, f_c, d_bolck_d, d_bolck_h, d_bolck_w)) + filter_dilation[:, :, 0:d_bolck_d:dilation[0], 0:d_bolck_h:dilation[1], 0: + d_bolck_w:dilation[2]] = filter + for d in range(out_d): for i in range(out_h): for j in range(out_w): for g in range(group): input_pad_masked = \ input_pad[:, g * f_c:(g + 1) * f_c, - d * stride[0]:d * stride[0] + f_d, - i * stride[1]:i * stride[1] + f_h, - j * stride[2]:j * stride[2] + f_w] - f_sub = filter[g * sub_out_c:(g + 1) * - sub_out_c, :, :, :, :] + d * stride[0]:d * stride[0] + d_bolck_d, + i * stride[1]:i * stride[1] + d_bolck_h, + j * stride[2]:j * stride[2] + d_bolck_w] + + f_sub = filter_dilation[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :, :] for k in range(sub_out_c): out[:, g * sub_out_c + k, d, i, j] = \ np.sum(input_pad_masked * f_sub[k, :, :, :, :], @@ -43,9 +56,14 @@ class TestConv3dOp(OpTest): def setUp(self): self.init_group() self.init_op_type() + self.init_dilation() self.init_test_case() - conv3d_param = {'stride': self.stride, 'pad': self.pad} + conv3d_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilations': self.dilations + } input = np.random.random(self.input_size).astype("float32") filter = np.random.random(self.filter_size).astype("float32") output = conv3d_forward_naive(input, filter, self.groups, @@ -55,7 +73,8 @@ class TestConv3dOp(OpTest): self.attrs = { 'strides': self.stride, 'paddings': self.pad, - 'groups': self.groups + 'groups': self.groups, + 'dilations': self.dilations } self.outputs = {'Output': output} @@ -88,6 +107,9 @@ class TestConv3dOp(OpTest): f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3, 3] + def init_dilation(self): + self.dilations = [1, 1, 1] + def init_group(self): self.groups = 1 @@ -104,27 +126,47 @@ class TestCase1(TestConv3dOp): f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3, 3] - def init_group(self): - self.groups = 1 - def init_op_type(self): - self.op_type = "conv3d" +class TestWithGroup1(TestConv3dOp): + def init_group(self): + self.groups = 3 -class TestWithGroup1(TestConv3dOp): +class TestWithGroup2(TestCase1): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv3d" +class TestWith1x1(TestConv3dOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 1, 1, 1] + + def init_dilation(self): + self.dilations = [1, 1, 1] -class TestWithGroup2(TestCase1): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv3d" + +class TestWithDilation(TestConv3dOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 6, 6, 6] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 2, 2, 2] + + def init_dilation(self): + self.dilations = [2, 2, 2] + + def init_group(self): + self.groups = 3 if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py index 132fe7931438a30cf02e4ad2894c0838e48ffc9f..8fd34b87bfea91307f52fdcbb9f71f2e1a9c6c56 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py @@ -4,9 +4,7 @@ from op_test import OpTest def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): - # [2, 3, 5, 5, 5] in_n, in_c, in_d, in_h, in_w = input_.shape - # [3, 6, 3, 3, 3] f_c, out_c, f_d, f_h, f_w = filter_.shape assert in_c == f_c @@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): out_d = (in_d - 1) * stride[0] + f_d out_h = (in_h - 1) * stride[1] + f_h out_w = (in_w - 1) * stride[2] + f_w - out = np.zeros((in_n, out_c, out_d, out_h, out_w)) for n in range(in_n): @@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): j1, j2 = j * stride[2], j * stride[2] + f_w out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out + out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w - + pad[2]] return out @@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.init_op_type() - - # [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7] self.init_test_case() conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad} @@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest): filter_ = np.random.random(self.filter_size).astype("float32") output = conv3dtranspose_forward_naive( input_, filter_, conv3dtranspose_param).astype("float32") - # print 'deconv output py', output, output.shape self.inputs = {'Input': input_, 'Filter': filter_} self.attrs = { @@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - print 'check output here' self.check_output() def test_check_grad(self): @@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest): self.pad = [0, 0, 0] self.stride = [1, 1, 1] self.dilations = [1, 1, 1] - self.input_size = [2, 3, 5, 5, 5] # NCHW + self.input_size = [2, 3, 5, 5, 5] # NCDHW f_c = self.input_size[1] self.filter_size = [f_c, 6, 3, 3, 3] @@ -93,5 +88,31 @@ class TestConv3dTransposeOp(OpTest): self.op_type = "conv3d_transpose" +class TestWithPad(TestConv3dTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithStride(TestConv3dTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +# ------------ test_cudnn ------------ +class TestCudnn(TestConv3dTransposeOp): + def init_op_type(self): + self.op_type = "conv3d_transpose_cudnn" + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_evaluator.py b/python/paddle/v2/fluid/tests/test_evaluator.py deleted file mode 100644 index 1d51205b703f83ec32c2e948394e5d3f5c87d1d9..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_evaluator.py +++ /dev/null @@ -1,64 +0,0 @@ -from paddle.v2.fluid.evaluator import Evaluator -from paddle.v2.fluid.op import Operator -import paddle.v2.fluid.core as core -import unittest -import op_test -import numpy as np - - -class TestEvaluator(unittest.TestCase): - def setup(self, scope, inputs, outputs): - def __create_var__(var_name, arr): - np_arr = np.array(arr) - scope.var(var_name) - # tensor = var.get_tensor() - # tensor.set_dims(np_arr.shape) - - for var_name, arr in inputs.iteritems(): - __create_var__(var_name, arr) - - for var_name, arr in outputs.iteritems(): - __create_var__(var_name, arr) - - def test_evaluator(self): - - inputs = { - 'Inference': np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 1]]).T, - 'Label': np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) - } - outputs = {'Accuracy': np.array([0.9])} - out_name = 'Accuracy' - - places = [core.CPUPlace()] - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - scope = core.Scope() - self.setup(scope, inputs, outputs) - - evaluator = Evaluator( - scope, - operator='accuracy', - input='Inference', - label='Label', - output=out_name, - place=place) - op_test.set_input(scope, evaluator.op, inputs, place) - ctx = core.DeviceContext.create(place) - - for i in range(10): # simulate 10 mini-batches - evaluator.evaluate(ctx) - - actual = np.array(scope.find_var(out_name).get_tensor()) - print actual - - self.assertTrue( - np.allclose( - actual, outputs[out_name], atol=1e-5), - "output name: " + out_name + " has diff.") - - -if __name__ == '__main__': - exit(0) - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_gru_op.py b/python/paddle/v2/fluid/tests/test_gru_op.py index b2474cff94c6c71cc62bc8e69a5d83e38d51c511..fa2c5a53ec4a01b6545e25f773c11277a4d24706 100644 --- a/python/paddle/v2/fluid/tests/test_gru_op.py +++ b/python/paddle/v2/fluid/tests/test_gru_op.py @@ -6,7 +6,8 @@ from test_lstm_op import identity, sigmoid, tanh, relu class TestGRUOp(OpTest): - batch_size = 9 + lod = [[0, 2, 6, 9]] + batch_size = lod[0][-1] frame_size = 5 activate = { 'identity': identity, @@ -35,7 +36,7 @@ class TestGRUOp(OpTest): seq_starts[sorted_seqs[i]] + batch_idx) idx_in_seq.append(idx) idx_in_seq_list.append(idx_in_seq) - return idx_in_seq_list + return idx_in_seq_list, sorted_seqs def gru_step(self, x, h_p, w, b): batch_size = x.shape[0] @@ -66,8 +67,8 @@ class TestGRUOp(OpTest): batch_hidden = self.outputs['BatchHidden'] hidden = self.outputs['Hidden'] idx_in_seq_list = self.idx_in_seq_list - h_p = self.inputs['H0'] if self.inputs.has_key('H0') else np.zeros( - (len(idx_in_seq_list[0]), self.frame_size)) + h_p = self.inputs['H0'][self.sorted_seqs] if self.inputs.has_key( + 'H0') else np.zeros((len(idx_in_seq_list[0]), self.frame_size)) num_batch = len(idx_in_seq_list) end_idx = 0 for batch_idx in range(num_batch): @@ -84,8 +85,9 @@ class TestGRUOp(OpTest): return batch_gate, batch_reset_hidden_prev, hidden def set_data(self): - lod = [[0, 2, 6, self.batch_size]] - self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse) + lod = self.lod + self.idx_in_seq_list, self.sorted_seqs = self.seq_to_batch( + lod, self.is_reverse) batch_size = self.batch_size frame_size = self.frame_size input = np.random.rand(batch_size, frame_size * 3).astype('float64') @@ -146,7 +148,7 @@ class TestGRUOpReverse(TestGRUOp): def set_confs(self): self.is_reverse = True self.attrs = { - 'activation': 'identity', + 'activation': 'tanh', 'gate_activation': 'sigmoid', 'is_reverse': self.is_reverse } diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py index f356f6e9ec0da2d3e1fb67638d81e8d54c544f53..501d5aa5797d6def708338692f0861657f951ef7 100644 --- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py +++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py @@ -28,8 +28,8 @@ def relu(x): class TestGRUUnitOp(OpTest): - batch_size = 3 - frame_size = 5 + batch_size = 5 + frame_size = 10 activate = { GRUActivationType.identity: identity, GRUActivationType.sigmoid: sigmoid, @@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + g[:, frame_size * 2:]) g = np.hstack((u_r, c)) - h = u * h_p + (1 - u) * c + h = u * c + (1 - u) * h_p self.outputs = { 'Gate': g.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'), @@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Input', 'HiddenPrev', 'Weight'], - ['Hidden', 'ResetHiddenPrev', 'Gate'], - max_relative_error=0.007) + self.check_grad(['Input', 'HiddenPrev', 'Weight'], ['Hidden']) class TestGRUUnitOpWithBias(TestGRUUnitOp): @@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): frame_size = self.frame_size super(TestGRUUnitOpWithBias, self).set_inputs() self.inputs['Bias'] = np.random.uniform( - -0.1, 0.1, (1, frame_size * 3)).astype('float32') + -0.1, 0.1, (1, frame_size * 3)).astype('float64') self.attrs = { 'activation': GRUActivationType.identity, 'gate_activation': GRUActivationType.sigmoid } def test_check_grad(self): + self.check_grad(['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden']) + + def test_check_grad_ingore_input(self): self.check_grad( - ['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], - max_relative_error=0.007) + ['HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + no_grad_set=set('Input')) if __name__ == '__main__': - exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py index f2eb79b209627f5814847db6d96c0a17300d9b5a..6c20203f8eca02b3f68ed2aa8664bed29551c070 100644 --- a/python/paddle/v2/fluid/tests/test_initializer.py +++ b/python/paddle/v2/fluid/tests/test_initializer.py @@ -223,5 +223,109 @@ class TestXavierInitializer(unittest.TestCase): self.assertEqual(init_op.attr('seed'), 134) +class TestMSRAInitializer(unittest.TestCase): + def test_uniform_msra_initializer(self): + """Test MSRA initializer with uniform distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_uniform_msra_initializer_conv(self): + """Test MSRA initializer with uniform distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + receptive_field_size = float(15 * 20) + limit = np.sqrt(6.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer(self): + """Test MSRA initializer with normal distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + std = np.sqrt(2.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer_conv(self): + """Test MSRA initializer with normal distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + receptive_field_size = float(15 * 20) + std = np.sqrt(2.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_msra_initializer_supplied_arguments(self): + """Test the MSRA initializer with supplied arguments + """ + program = framework.Program() + block = program.global_block() + block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer( + fan_in=12, seed=134)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / 12) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 134) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ed6e3fe24f6333c9c90d760787eb13241a7e1868 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py @@ -0,0 +1,43 @@ +import unittest +import numpy as np +from paddle.v2.fluid.op import Operator +import paddle.v2.fluid.core as core + + +def create_tensor(scope, name, np_data): + tensor = scope.var(name).get_tensor() + tensor.set_dims(np_data.shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class TestIsEmptyOp(unittest.TestCase): + def setUp(self): + self.scope = core.Scope() + # create input variables + np_data0 = np.array([0, 1, 2]) + create_tensor(self.scope, "X0", np_data0) + + np_data1 = np.array([1]) + t = create_tensor(self.scope, "X1", np_data1) + t.set_dims([0]) + + # create output variables + self.scope.var("out") + + def test_no_empty(self): + self.one_case("X0", False) + + def test_empty(self): + self.one_case("X1", True) + + def one_case(self, input, target): + op = Operator(type="is_empty", X=input, Out="out") + ctx = core.DeviceContext.create(core.CPUPlace()) + op.run(self.scope, ctx) + out = self.scope.var("out").get_tensor() + self.assertEqual(np.array(out)[0], target) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 3d18e7ce3a4dc6c6b917a1000de39fca71f6ac18..d3dc45742d92dc61b81d9cdc04056c5d5bdc2b63 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,8 +1,8 @@ +import unittest + import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets from paddle.v2.fluid.framework import Program -import paddle.v2.fluid.core as core -import unittest class TestBook(unittest.TestCase): @@ -20,7 +20,8 @@ class TestBook(unittest.TestCase): avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) - print str(program) + + # print str(program) def test_recognize_digits_mlp(self): program = Program() @@ -49,7 +50,7 @@ class TestBook(unittest.TestCase): input=predict, label=label, main_program=program) avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - print str(program) + # print str(program) def test_simple_conv2d(self): program = Program() @@ -64,7 +65,7 @@ class TestBook(unittest.TestCase): filter_size=[4, 4], main_program=program) - print str(program) + # print str(program) def test_recognize_digits_conv(self): program = Program() @@ -103,7 +104,7 @@ class TestBook(unittest.TestCase): program.append_backward(avg_cost) - print str(program) + # print str(program) def test_word_embedding(self): program = Program() @@ -164,7 +165,24 @@ class TestBook(unittest.TestCase): avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - print str(program) + # print str(program) + + def test_linear_chain_crf(self): + program = Program() + + # Change g_program, so the rest layers use `g_program` + images = layers.data( + name='pixel', + shape=[784], + data_type='float32', + main_program=program) + label = layers.data( + name='label', shape=[1], data_type='int32', main_program=program) + hidden = layers.fc(input=images, size=128, main_program=program) + crf = layers.linear_chain_crf( + input=hidden, label=label, main_program=program) + + # print str(program) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py index 6f06a66c825b37ee91214efc0a29a58f0b9057f9..c26634ff20c46e484d600c758be386ec8327d1c1 100644 --- a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py +++ b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py @@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest): transition_exps = np.exp(transition) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32") + low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") self.inputs = { "Emission": (emission, lod), diff --git a/python/paddle/v2/fluid/tests/test_logical_op.py b/python/paddle/v2/fluid/tests/test_logical_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ac90bf839cb96053387bb82c112692136707744c --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_logical_op.py @@ -0,0 +1,35 @@ +import op_test +import unittest +import numpy as np + + +def create_test_class(op_type, callback, binary_op=True): + class Cls(op_test.OpTest): + def setUp(self): + a = np.random.choice(a=[True, False], size=(10, 7)).astype(bool) + if binary_op: + b = np.random.choice(a=[True, False], size=(10, 7)).astype(bool) + c = callback(a, b) + else: + c = callback(a) + self.outputs = {'Out': c} + self.op_type = op_type + if binary_op: + self.inputs = {'X': a, 'Y': b} + else: + self.inputs = {'X': a} + + def test_output(self): + self.check_output() + + Cls.__name__ = op_type + globals()[op_type] = Cls + + +create_test_class('logical_and', lambda _a, _b: np.logical_and(_a, _b)) +create_test_class('logical_or', lambda _a, _b: np.logical_or(_a, _b)) +create_test_class('logical_not', lambda _a: np.logical_not(_a), False) +create_test_class('logical_xor', lambda _a, _b: np.logical_xor(_a, _b)) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_maxout_op.py b/python/paddle/v2/fluid/tests/test_maxout_op.py new file mode 100644 index 0000000000000000000000000000000000000000..05e42f315833cab5bc5272cbd2173ea8012ff7f5 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_maxout_op.py @@ -0,0 +1,39 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def maxout_forward_naive(input, groups): + s0, s1, s2, s3 = input.shape + return np.ndarray([s0, s1 / groups, groups, s2, s3], \ + buffer = input, dtype=input.dtype).max(axis=(2)) + + +class TestMaxOutOp(OpTest): + def setUp(self): + self.op_type = "maxout" + self.init_test_case() + input = np.random.random(self.shape).astype("float32") + output = self.MaxOut_forward_naive(input, self.groups).astype("float32") + + self.inputs = {'X': input} + self.attrs = {'groups': self.groups} + + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + def init_test_case(self): + self.MaxOut_forward_naive = maxout_forward_naive + self.shape = [100, 6, 2, 2] + self.groups=2 + + + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py new file mode 100644 index 0000000000000000000000000000000000000000..8af99005dc0b5d50de60ca89c2ddf870b1537edb --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -0,0 +1,154 @@ +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import MomentumOptimizer +import paddle.v2.fluid.core as core +import paddle.v2 as paddle +import unittest +import numpy as np + + +class TestMNISTIfElseOp(unittest.TestCase): + def test_raw_api(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond, **kwargs) + + true_out = layers.create_tensor(dtype='float32', **kwargs) + true_cond = layers.ConditionalBlock([true_image], **kwargs) + + with true_cond.block(): + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=true_out, **kwargs) + + false_out = layers.create_tensor(dtype='float32', **kwargs) + false_cond = layers.ConditionalBlock([false_image], **kwargs) + + with false_cond.block(): + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=false_out, **kwargs) + + prob = layers.merge_lod_tensor( + in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) + loss = layers.cross_entropy(input=prob, label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + def test_ifelse(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + + ie = layers.IfElse(cond, **kwargs) + + with ie.true_block(): + true_image = ie.input(image) + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + with ie.false_block(): + false_image = ie.input(image) + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + prob = ie() + loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index 0ebf7cdf208c41eacfdff88f59455584eff4ff8f..2459dfd664300d405edb36c4ca906c1769b5e7d2 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -16,14 +16,18 @@ class TestOptimizer(unittest.TestCase): dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) - opts = sgd_optimizer.minimize(mul_out, init_program) + opts = sgd_optimizer.minimize(mean_out, init_program) self.assertEqual(len(opts), 1) sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") @@ -44,12 +48,16 @@ class TestOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) global_step = block.create_var( dtype="float32", shape=[1], lod_level=0, name="step") learning_rate = 0.01 sgd_optimizer = optimizer.SGDOptimizer( learning_rate=learning_rate, global_step=global_step) - opts = sgd_optimizer.minimize(mul_out, init_program) + opts = sgd_optimizer.minimize(mean_out, init_program) self.assertEqual(len(opts), 2) sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") @@ -90,7 +98,11 @@ class TestMomentumOptimizer(unittest.TestCase): learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -132,10 +144,14 @@ class TestMomentumOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2, use_nesterov=True) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -186,10 +202,14 @@ class TestAdagradOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adagrad_optimizer = self.MockAdagrad( learning_rate=learning_rate, epsilon=1.0e-6) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, @@ -198,7 +218,7 @@ class TestAdagradOptimizer(unittest.TestCase): adagrad_op = opts[0] self.assertEqual(adagrad_op.type, "adagrad") - # check accumulators + # Check accumulators accumulators = adagrad_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators) @@ -242,10 +262,14 @@ class TestAdamOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adam_optimizer = self.MockAdam( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, @@ -300,10 +324,14 @@ class TestAdamaxOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adamax_optimizer = self.MockAdamax( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, @@ -331,5 +359,63 @@ class TestAdamaxOptimizer(unittest.TestCase): self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) +class TestDecayedAdagradOptimizer(unittest.TestCase): + class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_moment_str(self): + return self._moment_acc_str + + def test_decayed_adagrad_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + learning_rate = 0.01 + decayed_adagrad_optimizer = self.MockDecayedAdagrad( + learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) + params_grads = append_backward_ops(mean_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) + opts = decayed_adagrad_optimizer.create_optimization_pass( + params_grads, mul_out, init_program) + self.assertEqual(len(opts), 1) + decayed_adagrad_op = opts[0] + self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad") + + # Check accumulators + accumulators = decayed_adagrad_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 1) + self.assertTrue( + decayed_adagrad_optimizer.get_moment_str() in accumulators) + moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] + self.assertEqual(len(moment_acc), 1) + self.assertTrue(mul_x.name in moment_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 2) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[1].type, "fill_constant") + self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py index 71a1bd2aaf5a9c6362ce0d35c256ed228e942fce..a633d22c2b1db2728b6eb767078ce4aec6cce163 100644 --- a/python/paddle/v2/fluid/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -1,26 +1,32 @@ import unittest from paddle.v2.fluid.framework import g_main_program import paddle.v2.fluid.core as core +from paddle.v2.fluid.executor import Executor +import paddle.v2.fluid.io as io +from paddle.v2.fluid.initializer import ConstantInitializer +import numpy as np class TestParameter(unittest.TestCase): def test_param(self): - b = g_main_program.create_block() + shape = [784, 100] + val = 1.0625 + b = g_main_program.global_block() param = b.create_parameter( name='fc.w', - shape=[784, 100], + shape=shape, dtype='float32', - initialize_attr={ - 'type': 'uniform_random', - 'seed': 13, - 'min': -5.0, - 'max': 5.0 - }) + initializer=ConstantInitializer(val)) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) self.assertEqual(core.DataType.FP32, param.data_type) self.assertEqual(0, param.block.idx) + exe = Executor(core.CPUPlace()) + p = exe.run(g_main_program, fetch_list=[param])[0] + self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) + p = io.get_parameter_value_by_name('fc.w', exe, g_main_program) + self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py index ac3fa6aa87835b3cd6fb9bbf6fe66b1d0c577ca2..5dff6270f455395ce6ca8ae2428236f630467095 100644 --- a/python/paddle/v2/fluid/tests/test_pool2d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -23,8 +22,7 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -47,6 +45,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool2d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() self.init_op_type() self.init_pool_type() if self.global_pool: @@ -75,8 +74,6 @@ class TestPool2d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 5, 5] self.ksize = [3, 3] self.strides = [1, 1] @@ -87,12 +84,14 @@ class TestPool2d_Op(OpTest): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = True class TestCase1(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -103,12 +102,14 @@ class TestCase1(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = False class TestCase2(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -119,152 +120,69 @@ class TestCase2(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + def init_global_pool(self): + self.global_pool = False -class TestCase3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCase3(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] + +class TestCase4(TestCase1): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] + +class TestCase5(TestCase2): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive #--------------------test pool2d_cudnn-------------------- -class TestCaseCudnn1(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] - +class TestCudnnCase1(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn2(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase2(TestCase1): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase3(TestCase2): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase4(TestCase3): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase5(TestCase4): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn6(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase6(TestCase5): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py index 87483ae5e568c01141ff789f37e84069cb8e827d..2ba86665a7d207e61159c02643fa40daca3be080 100644 --- a/python/paddle/v2/fluid/tests/test_pool3d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -27,8 +26,7 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -55,6 +53,10 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool3d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() + self.init_op_type() + self.init_pool_type() + if self.global_pool: self.paddings = [0 for _ in range(len(self.paddings))] input = np.random.random(self.shape).astype("float32") @@ -81,74 +83,115 @@ class TestPool3d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = True + class TestCase1(TestPool3d_Op): def init_test_case(self): - self.global_pool = False self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] - -class TestCase2(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "avg" self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + + +class TestCase2(TestPool3d_Op): + def init_test_case(self): self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + class TestCase3(TestPool3d_Op): - def init_test_case(self): - self.global_pool = True + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase4(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase4(TestCase1): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase5(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase5(TestCase2): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] + + +#--------------------test pool3d_cudnn-------------------- +class TestCudnnCase1(TestPool3d_Op): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase2(TestCase1): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase3(TestCase2): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase4(TestCase3): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase5(TestCase4): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase6(TestCase5): + def init_op_type(self): + self.op_type = "pool3d_cudnn" if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py index 04843a28ac19e076e097d1aa1034bcf9378aa495..9d2d61c43868701392e90542f3b7fb2c4ea07548 100644 --- a/python/paddle/v2/fluid/tests/test_pool_max_op.py +++ b/python/paddle/v2/fluid/tests/test_pool_max_op.py @@ -3,11 +3,13 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, D, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [D, H, W] + paddings = [0, 0, 0] + D_out = (D - ksize[0] + 2 * paddings[0]) / strides[0] + 1 H_out = (H - ksize[1] + 2 * paddings[1]) / strides[1] + 1 W_out = (W - ksize[2] + 2 * paddings[2]) / strides[2] + 1 @@ -40,11 +42,13 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): return out, mask -def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [H, W] + paddings = [0, 0] + H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1 W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) @@ -74,13 +78,13 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): class TestMaxPoolWithIndex_Op(OpTest): def setUp(self): self.init_test_case() - if self.global_pool: - self.paddings = [0 for _ in range(len(self.paddings))] + self.init_global() + input = np.random.random(self.shape).astype("float32") output, mask = self.pool_forward_naive(input, self.ksize, self.strides, self.paddings, self.global_pool) output = output.astype("float32") - mask = mask.astype("float32") + mask = mask.astype("int32") self.attrs = { 'strides': self.strides, @@ -99,41 +103,24 @@ class TestMaxPoolWithIndex_Op(OpTest): # self.check_grad(set(['X']), ['Out'], max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.index = "max_pool3d_with_index" - self.op_type = "%s" % self.index + self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_global(self): + self.global_pool = False + class TestCase1(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] class TestCase2(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] - - -class TestCase3(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = False self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] @@ -141,32 +128,18 @@ class TestCase3(TestMaxPoolWithIndex_Op): self.strides = [2, 2, 2] self.paddings = [0, 0, 0] - -class TestCase4(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] -class TestCase5(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [2, 2, 2] - self.paddings = [0, 0, 0] +class TestCase3(TestCase2): + def init_global(self): + self.global_pool = False -class TestCase6(TestMaxPoolWithIndex_Op): +#----------------max_pool2d_with_index---------------- +class TestCase4(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -174,10 +147,17 @@ class TestCase6(TestMaxPoolWithIndex_Op): self.strides = [1, 1] self.paddings = [1, 1] + def init_global(self): + self.global_pool = True + -class TestCase7(TestMaxPoolWithIndex_Op): - def init_test_case(self): +class TestCase5(TestCase4): + def init_global(self): self.global_pool = False + + +class TestCase6(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -185,27 +165,13 @@ class TestCase7(TestMaxPoolWithIndex_Op): self.strides = [2, 2] self.paddings = [0, 0] - -class TestCase8(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] -class TestCase9(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [2, 2] - self.paddings = [0, 0] +class TestCase7(TestCase6): + def init_global(self): + self.global_pool = False if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py index ef2daf6916e14c015a39ae0193948e7ff6531449..e9bcefd21569aaa9225c676ea03b5c8e37d00333 100644 --- a/python/paddle/v2/fluid/tests/test_program.py +++ b/python/paddle/v2/fluid/tests/test_program.py @@ -1,6 +1,5 @@ import unittest -import paddle.v2.fluid.core as core from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import g_main_program @@ -98,21 +97,26 @@ class TestProgram(unittest.TestCase): "Y": add_y}, outputs={"Out": add_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": add_out}, outputs={"Out": mean_out}) self.assertEqual(mul_op.idx, 0) self.assertEqual(add_op.idx, 1) - param_to_grad = prog.append_backward(add_out, set()) + param_to_grad = prog.append_backward(mean_out, set()) def grad_name(name): return name + "@GRAD" - for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"): + for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out", + "mean.out"): self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) self.assertEqual(param_to_grad[var_name][1], 0) expect_ops = [ - "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", - "mul_grad" + "mul", "elementwise_add", "mean", "fill_constant", "mean_grad", + "elementwise_add_grad", "mul_grad" ] actual_ops = [] for op in block.ops: diff --git a/python/paddle/v2/fluid/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py index f5d1eb3b96211bd7c7335dbe116a1d765d7bae50..24baf55e90c98f39bab926e8c85a791eee5ed4a4 100644 --- a/python/paddle/v2/fluid/tests/test_regularizer.py +++ b/python/paddle/v2/fluid/tests/test_regularizer.py @@ -29,7 +29,11 @@ class TestL2DecayRegularizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) @@ -62,7 +66,11 @@ class TestL1DecayRegularizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ccd9a05343b0c4aa05b258959665c0662f271512 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py @@ -0,0 +1,47 @@ +import unittest +import numpy as np +import sys +from op_test import OpTest + + +class TestSequenceSliceOp(OpTest): + def set_data(self): + self.init_test_case() + # only supprot one level LoD + x = np.random.random(self.x_dim).astype('float32') + lod = self.x_lod + offset = np.array(self.offset).astype("int64") + length = np.array(self.length).astype("int64") + + self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length} + outs = [] #np.zeros((100, 3, 2)).astype('float32') + out_lod = [[0]] + out_lod_offset = 0 + for i in range(len(offset)): + sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] + + length[i, 0], :] + out_lod_offset = out_lod_offset + len(sub_x) + outs.append(sub_x) + out_lod[0].append(out_lod_offset) + outs = np.concatenate(outs, axis=0) + self.outputs = {'Out': (outs, out_lod)} + + def init_test_case(self): + self.x_dim = (100, 3, 2) + self.x_lod = [[0, 20, 40, 60, 80, 100]] + self.offset = [[1], [2], [3], [4], [5]] + self.length = [[10], [8], [6], [4], [2]] + + def setUp(self): + self.op_type = "sequence_slice" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py index a3e60a751719666bdca56a3096b688125d09f4b2..c3e1f9ac0a70e7448fd8d1983b1c04d27af9771c 100644 --- a/python/paddle/v2/fluid/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.fluid.framework import Variable, g_main_program, Program +from paddle.v2.fluid.framework import g_main_program, Program, convert_np_dtype_to_dtype_ import paddle.v2.fluid.core as core import numpy as np @@ -7,7 +7,7 @@ import numpy as np class TestVariable(unittest.TestCase): def test_np_dtype_convert(self): DT = core.DataType - convert = Variable._convert_np_dtype_to_dtype_ + convert = convert_np_dtype_to_dtype_ self.assertEqual(DT.FP32, convert(np.float32)) self.assertEqual(DT.FP16, convert("float16")) self.assertEqual(DT.FP64, convert("float64")) diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 0f01acb3b94dc55a3536e751108e785ddc6e47bb..84b432333f950f754a97bc1a051b59c16fb22aed 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -2,6 +2,7 @@ import unittest import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.core as core +from paddle.v2.fluid.backward import append_backward_ops import numpy @@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase): i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') - mem_array = layers.array_write(init, i=i) + mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) @@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase): i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) + array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) - i = layers.increment(x=i, in_place=True) result = layers.sums(input=[d, prev]) + + i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) - sum_result = layers.array_read(mem_array, i=array_len) + + sum_result = layers.array_read(array=mem_array, i=i) + loss = layers.mean(x=sum_result) + + append_backward_ops(loss) cpu = core.CPUPlace() exe = Executor(cpu) diff --git a/python/paddle/v2/model.py b/python/paddle/v2/model.py deleted file mode 100644 index 4634db55a919584db91e456e61d393b9e15129ac..0000000000000000000000000000000000000000 --- a/python/paddle/v2/model.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import errno -import uuid - -import paddle.v2.master - -__all__ = ["save_model", "load_model"] - -trainer_id = str(uuid.uuid4()) - - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def save_model(parameters, path): - need_request = "KUBERNETES_SERVICE_HOST" in os.environ.keys() - - if need_request: - # TODO(helin): figure out how MPI trains, since MPI only save - # model when trainer_id == "0", we can consolidate the logic - # here. - - # TODO(helin): change this environment variable name from - # MASTER_IP to ETCD_IP - etcd_name = "MASTER_IP" - if etcd_name not in os.environ.keys(): - raise Exception('not find ' + etcd_name + - ' in environment variable.') - - etcd_ip = os.environ.get(etcd_name) - client = paddle.v2.master.client("http://" + etcd_ip + ":2379", 5, 0) - r = client.request_save_model(trainer_id, 5000) - if r == 0: - # do not need to save - return - elif r < 0: - # error - return - else: - # save model - path = os.path.join(path, trainer_id) - path = os.path.join(path, "model.tar") - - mkdir_p(path) - - with open(path, 'wb') as f: - parameters.to_tar(f) - - -def load_model(parameters, path): - with open(path, 'rb') as f: - parameters.from_tar(f)