提交 78dae3c5 编写于 作者: Y yangyaming

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into ssd_util

......@@ -16,3 +16,6 @@ third_party/
*~
bazel-*
third_party/
# clion workspace.
cmake-build-*
......@@ -3,8 +3,8 @@
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
......
......@@ -49,6 +49,7 @@ before_install:
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install rarfile
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script:
......
......@@ -30,7 +30,8 @@ RUN apt-get update && \
python-numpy python-matplotlib gcc g++ \
automake locales clang-format-3.8 swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev && \
clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools && \
apt-get clean -y
# Install Go
......@@ -56,7 +57,8 @@ RUN pip install --upgrade pip && \
pip install -U docopt PyYAML sphinx && \
pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0'
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install rarfile
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
......
......@@ -70,7 +70,7 @@ before looking into the
We provide [English](http://www.paddlepaddle.org/develop/doc/) and
[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
- [Deep Learning 101](http://book.paddlepaddle.org/index.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook.
......
......@@ -59,7 +59,7 @@ macro(add_style_check_target TARGET_NAME)
"--filter=${STYLE_FILTER}"
"--write-success=${CUR_GEN}" ${filename}
DEPENDS ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endforeach()
endif()
......
......@@ -11,11 +11,16 @@ find_path(CUDNN_INCLUDE_DIR cudnn.h
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
set(TARGET_ARCH "x86_64")
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT}
${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib
${CUDNN_ROOT}/lib/x86_64-linux-gnu
${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
......
......@@ -18,3 +18,4 @@ ExternalProject_Add(
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
LIST(APPEND external_project_dependencies linb_any)
\ No newline at end of file
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -26,7 +26,7 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add(
gflags
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR}
......@@ -44,4 +44,8 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=Release
)
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags)
LIST(APPEND external_project_dependencies gflags)
......@@ -27,7 +27,7 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
extern_glog
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git"
......@@ -48,4 +48,8 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=Release
)
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog)
LIST(APPEND external_project_dependencies glog)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -35,7 +35,7 @@ IF(WITH_TESTING)
ENDIF(WIN32)
ExternalProject_Add(
gtest
extern_gtest
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG "release-1.8.0"
......@@ -55,5 +55,14 @@ IF(WITH_TESTING)
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies gtest)
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
ADD_DEPENDENCIES(gtest extern_gtest)
ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
ADD_DEPENDENCIES(gtest_main extern_gtest)
LIST(APPEND external_project_dependencies gtest gtest_main)
ENDIF(WITH_TESTING)
......@@ -24,24 +24,29 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1)
IF(ANDROID)
# arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0 libs)
ELSEIF(RPI)
# use hardfp
SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0 libs)
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs)
IF(CMAKE_CROSSCOMPILING)
IF(ANDROID)
# arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(RPI)
# use hardfp
SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0)
ENDIF()
ELSE()
SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPENBLAS_ARGS DYNAMIC_ARCH=1 libs)
SET(OPTIONAL_ARGS "")
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
ENDIF()
ENDIF()
ExternalProject_Add(
openblas
extern_openblas
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG ${OPENBLAS_COMMIT}
......@@ -53,8 +58,14 @@ IF(NOT ${CBLAS_FOUND})
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
LIST(APPEND external_project_dependencies openblas)
ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
ADD_LIBRARY(cblas STATIC IMPORTED)
SET_PROPERTY(TARGET cblas PROPERTY IMPORTED_LOCATION ${CBLAS_LIBRARIES})
IF(NOT ${CBLAS_FOUND})
ADD_DEPENDENCIES(cblas extern_openblas)
LIST(APPEND external_project_dependencies cblas)
ENDIF(NOT ${CBLAS_FOUND})
......@@ -14,6 +14,34 @@
INCLUDE(ExternalProject)
macro(PROMPT_PROTOBUF_LIB)
MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}")
MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}")
MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}")
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
RETURN()
endmacro()
macro(SET_PROTOBUF_VERSION)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
endmacro()
set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf")
if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include)
find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib)
find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib)
find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib)
find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin)
if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE)
message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.")
SET_PROTOBUF_VERSION()
PROMPT_PROTOBUF_LIB()
else()
message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}.")
endif()
endif()
FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_NAME})
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_NAME})
......@@ -78,8 +106,7 @@ IF(NOT CMAKE_CROSSCOMPILING)
FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION})
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
SET_PROTOBUF_VERSION()
IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
......@@ -107,6 +134,4 @@ IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_PROTOC_LIBRARY ${protobuf_PROTOC_LIBRARY} CACHE FILEPATH "protoc library." FORCE)
ENDIF(NOT PROTOBUF_FOUND)
MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}")
MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}")
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
PROMPT_PROTOBUF_LIB()
\ No newline at end of file
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -43,7 +43,7 @@ ELSE()
ENDIF()
ExternalProject_Add(
warpctc
extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
PREFIX ${WARPCTC_SOURCES_DIR}
......@@ -65,4 +65,8 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
)
ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
ADD_DEPENDENCIES(warpctc extern_warpctc)
LIST(APPEND external_project_dependencies warpctc)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -16,7 +16,7 @@
# To simplify the build process of PaddlePaddle, we defined couple of
# fundamental abstractions, e.g., how to build library, binary and
# test in C++, CUDA and Go.
#
#
# -------------------------------------------
# C++ CUDA C++ Go
# -------------------------------------------
......@@ -29,6 +29,11 @@
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
#
if(NOT APPLE)
find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT})
endif(NOT APPLE)
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor
# SRCS
......@@ -45,7 +50,9 @@ function(cc_library TARGET_NAME)
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies})
if (cc_library_DEPS)
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
......@@ -58,8 +65,7 @@ function(cc_binary TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
link_paddle_exe(${TARGET_NAME})
if(cc_binary_DEPS)
if(cc_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
endif()
......@@ -73,17 +79,16 @@ endfunction(cc_binary)
# DEPS
# tensor)
function(cc_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
link_paddle_test(${TARGET_NAME})
if(cc_test_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS})
add_dependencies(${TARGET_NAME} ${cc_test_DEPS})
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
add_test(${TARGET_NAME} ${TARGET_NAME})
endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
......@@ -95,28 +100,33 @@ endfunction(cc_test)
# DEPS
# tensor)
function(nv_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
if (WITH_GPU)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
endif()
if (nv_library_DEPS)
add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
endif()
endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
endfunction(nv_library)
function(nv_binary TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
link_paddle_exe(${TARGET_NAME})
if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
if (WITH_GPU)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
endif()
endif()
endfunction(nv_binary)
......@@ -128,17 +138,16 @@ endfunction(nv_binary)
# DEPS
# ops)
function(nv_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
link_paddle_test(${TARGET_NAME})
if(nv_test_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS})
if (WITH_GPU AND WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main)
add_test(${TARGET_NAME} ${TARGET_NAME})
endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test)
set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
......@@ -164,7 +173,7 @@ function(go_library TARGET_NAME)
set(LIB_NAME "lib${TARGET_NAME}.dylib")
else()
set(LIB_NAME "lib${TARGET_NAME}.so")
endif()
endif()
else()
set(BUILD_MODE "-buildmode=c-archive")
set(LIB_NAME "lib${TARGET_NAME}.a")
......@@ -173,7 +182,7 @@ function(go_library TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${go_library_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS})
add_library(${TARGET_NAME} STATIC IMPORTED)
set_property(TARGET ${TARGET_NAME} PROPERTY
......@@ -190,8 +199,8 @@ function(go_binary TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_library_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS})
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
endfunction(go_binary)
......@@ -204,8 +213,8 @@ function(go_test TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test
-c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_test_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
endfunction(go_test)
......
......@@ -10,7 +10,7 @@ if(WITH_RDMA)
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
......@@ -19,7 +19,9 @@ if(WITH_RDMA)
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
......@@ -44,7 +46,7 @@ if(WITH_RDMA)
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
......@@ -53,19 +55,19 @@ if(WITH_RDMA)
RDMA_LIB_NUMA
)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
include_directories("${RDMA_INC_DIR}")
......
......@@ -149,8 +149,9 @@ endfunction()
# Create a python unittest using run_python_tests.sh,
# which takes care of making correct running environment
function(add_python_test TEST_NAME)
add_test(NAME ${TEST_NAME}
COMMAND bash ${PROJ_ROOT}/paddle/scripts/run_python_tests.sh
add_test(NAME ${TEST_NAME}
COMMAND env PADDLE_PACKAGE_DIR=${PADDLE_PYTHON_PACKAGE_DIR}
bash ${PROJ_ROOT}/paddle/scripts/run_python_tests.sh
${USE_VIRTUALENV_FOR_TEST} ${PYTHON_EXECUTABLE} ${ARGN}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction()
data/cifar-10-batches-py
data/cifar-out
cifar_vgg_model/*
plot.png
train.log
image_provider_copy_1.py
*pyc
train.list
test.list
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz
rm -rf cifar-out/*
echo Converting CIFAR data to images.....
python process_cifar.py ./cifar-10-batches-py ./cifar-out
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import sys
import os
import PIL.Image as Image
"""
Usage: python process_cifar input_dir output_dir
"""
def mkdir_not_exist(path):
"""
Make dir if the path does not exist.
path: the path to be created.
"""
if not os.path.exists(path):
os.mkdir(path)
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
output_dir: the direcotry structure path.
"""
mkdir_not_exist(os.path.join(output_dir))
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
label_set: the set of labels.
output_dir: the output path.
data_split: whether it is training or testing data.
"""
data = np.load(batch_path)
for data, label, filename in zip(data['data'], data['labels'],
data['filenames']):
data = data.reshape((3, 32, 32))
data = np.transpose(data, (1, 2, 0))
label = label_map[label]
output_dir_this = os.path.join(output_dir, data_split, str(label))
output_filename = os.path.join(output_dir_this, filename)
if not label in label_set:
label_set[label] = True
mkdir_not_exist(output_dir_this)
Image.fromarray(data).save(output_filename)
if __name__ == '__main__':
input_dir = sys.argv[1]
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
label_map = {
0: "airplane",
1: "automobile",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck"
}
labels = {}
for i in range(1, num_batch + 1):
convert_batch(
os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
output_dir, "train")
convert_batch(
os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
"test")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import random
import paddle.utils.image_util as image_util
from paddle.trainer.PyDataProvider2 import *
#
# {'img_size': 32,
# 'settings': a global object,
# 'color': True,
# 'mean_img_size': 32,
# 'meta': './data/cifar-out/batches/batches.meta',
# 'num_classes': 10,
# 'file_list': ('./data/cifar-out/batches/train_batch_000',),
# 'use_jpeg': True}
def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
is_train, **kwargs):
settings.mean_img_size = mean_img_size
settings.img_size = img_size
settings.num_classes = num_classes
settings.color = color
settings.is_train = is_train
if settings.color:
settings.img_raw_size = settings.img_size * settings.img_size * 3
else:
settings.img_raw_size = settings.img_size * settings.img_size
settings.meta_path = meta
settings.use_jpeg = use_jpeg
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
settings.img_size, settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = {
'image': dense_vector(settings.img_raw_size),
'label': integer_value(settings.num_classes)
}
settings.logger.info('DataProvider Initialization finished')
@provider(init_hook=hook, min_pool_size=0)
def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_list: the batch file list.
"""
with open(file_list, 'r') as fdata:
lines = [line.strip() for line in fdata]
random.shuffle(lines)
for file_name in lines:
with io.open(file_name.strip(), 'rb') as file:
data = cPickle.load(file)
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
for i in indexes:
if settings.use_jpeg == 1:
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
img_feat = image_util.preprocess_img(
img, settings.img_mean, settings.img_size,
settings.is_train, settings.color)
label = data['labels'][i]
yield {
'image': img_feat.astype('float32'),
'label': int(label)
}
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from PIL import Image
from cStringIO import StringIO
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
percent = (target_size / float(min(img.size[0], img.size[1])))
resized_size = int(round(img.size[0] * percent)), int(
round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
def flip(im):
"""
Return the flipped image.
Flip an image along the horizontal direction.
im: input image, (H x W x K) ndarrays
"""
if len(im.shape) == 3:
return im[:, :, ::-1]
else:
return im[:, ::-1]
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
The size of the cropped image is inner_size * inner_size.
im: (K x H x W) ndarrays
inner_size: the cropped image size.
color: whether it is color image.
test: whether in test mode.
If False, does random cropping and flipping.
If True, crop the center of images.
"""
if color:
height, width = max(inner_size, im.shape[1]), max(inner_size,
im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
padded_im[:, startY:endY, startX:endX] = im
else:
im = im.astype('float32')
height, width = max(inner_size, im.shape[0]), max(inner_size,
im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
padded_im[startY:endY, startX:endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
else:
startY = np.random.randint(0, height - inner_size + 1)
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
pic = padded_im[:, startY:endY, startX:endX]
else:
pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
If is_train is false, cropping the center region from the image.
If is_train is true, randomly crop a region from the image,
and randomy does flipping.
im: (K x H x W) ndarrays
"""
im = im.astype('float32')
test = not is_train
pic = crop_img(im, crop_size, color, test)
pic -= img_mean
return pic.flatten()
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
Load the meta image, which is the mean of the images in the dataset.
The mean image is subtracted from every input image so that the expected mean
of each input image is zero.
"""
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
mean = mean[:, border:border + crop_size, border:border +
crop_size].astype('float32')
else:
assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
mean = mean[border:border + crop_size, border:border +
crop_size].astype('float32')
return mean
def load_image(img_path, is_color=True):
"""
Load image and return.
img_path: image path.
is_color: is color image or not.
"""
img = Image.open(img_path)
img.load()
return img
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
crop_dims: (height, width) tuple for the crops.
Returned data contains ten crops of input image, namely,
four corner patches and the center patch as well as their
horizontal reflections.
"""
# Dimensions and center.
im_shape = np.array(img[0].shape)
crop_dims = np.array(crop_dims)
im_center = im_shape[:2] / 2.0
# Make crop coordinates
h_indices = (0, im_shape[0] - crop_dims[0])
w_indices = (0, im_shape[1] - crop_dims[1])
crops_ix = np.empty((5, 4), dtype=int)
curr = 0
for i in h_indices:
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
[-crop_dims / 2.0, crop_dims / 2.0])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
crops = np.empty(
(10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops
class ImageTransformer:
def __init__(self,
transpose=None,
channel_swap=None,
mean=None,
is_color=True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
self.is_color = is_color
def set_transpose(self, order):
if self.is_color:
assert 3 == len(order)
self.transpose = order
def set_channel_swap(self, order):
if self.is_color:
assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
self.mean = mean
def transformer(self, data):
if self.transpose is not None:
data = data.transpose(self.transpose)
if self.channel_swap is not None:
data = data[self.channel_swap, :, :]
if self.mean is not None:
data -= self.mean
return data
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
model=cifar_vgg_model/pass-00299/
image=data/cifar-out/test/airplane/seaplane_s_000978.png
use_gpu=1
python prediction.py $model $image $use_gpu
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
import logging
from PIL import Image
from optparse import OptionParser
import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
class ImageClassifier():
def __init__(self,
train_conf,
use_gpu=True,
model_dir=None,
resize_dim=None,
crop_dim=None,
mean_file=None,
oversample=False,
is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
resize_dim: int, resized image size.
crop_dim: int, crop size.
mean_file: string, image mean file.
oversample: bool, oversample means multiple crops, namely five
patches (the four corner patches and the center
patch) as well as their horizontal reflections,
ten crops in all.
"""
self.train_conf = train_conf
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.resize_dim = resize_dim
self.crop_dims = [crop_dim, crop_dim]
self.oversample = oversample
self.is_color = is_color
self.transformer = image_util.ImageTransformer(is_color=is_color)
self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [dense_vector(data_size)]
self.converter = DataProviderConverter(slots)
def get_data(self, img_path):
"""
1. load image from img_path.
2. resize or oversampling.
3. transformer data: transpose, sub mean.
return K x H x W ndarray.
img_path: image path.
"""
image = image_util.load_image(img_path, self.is_color)
if self.oversample:
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
input = np.zeros(
(1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros(
(1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
for img in input:
img = self.transformer.transformer(img).flatten()
data_in.append([img.tolist()])
return data_in
def forward(self, input_data):
in_arg = self.converter(input_data)
return self.network.forwardTest(in_arg)
def forward(self, data, output_layer):
"""
input_data: py_paddle input data.
output_layer: specify the name of probability, namely the layer with
softmax activation.
return: the predicting probability of each label.
"""
input = self.converter(data)
self.network.forwardTest(input)
output = self.network.getLayerOutputs(output_layer)
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring)
assert isinstance(output_layer, basestring)
data = self.get_data(image)
prob = self.forward(data, output_layer)
lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0])
if __name__ == '__main__':
image_size = 32
crop_size = 32
multi_crop = True
config = "vgg_16_cifar.py"
output_layer = "__fc_layer_1__"
mean_path = "data/cifar-out/batches/batches.meta"
model_path = sys.argv[1]
image = sys.argv[2]
use_gpu = bool(int(sys.argv[3]))
obj = ImageClassifier(
train_conf=config,
model_dir=model_path,
resize_dim=image_size,
crop_dim=crop_size,
mean_file=mean_path,
use_gpu=use_gpu,
oversample=multi_crop)
obj.predict(image, output_layer)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.utils.preprocess_img import ImageClassificationDatasetCreater
from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
parser.add_option(
"-i",
"--input",
action="store",
dest="input",
help="Input data directory.")
parser.add_option(
"-s",
"--size",
action="store",
dest="size",
help="Processed image size.")
parser.add_option(
"-c",
"--color",
action="store",
dest="color",
help="whether to use color images.")
return parser.parse_args()
if __name__ == '__main__':
options, args = option_parser()
data_dir = options.input
processed_image_size = int(options.size)
color = options.color == "1"
data_creator = ImageClassificationDatasetCreater(
data_dir, processed_image_size, color)
data_creator.train_list_name = "train.txt"
data_creator.test_list_name = "test.txt"
data_creator.num_per_batch = 1000
data_creator.overwrite = True
data_creator.create_batches()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=vgg_16_cifar.py
output=./cifar_vgg_model
log=train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--trainer_count=1 \
--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir = 'data/cifar-out/batches/'
meta_path = data_dir + 'batches.meta'
args = {
'meta': meta_path,
'mean_img_size': 32,
'img_size': 32,
'num_classes': 10,
'use_jpeg': 1,
'color': "color"
}
define_py_data_sources2(
train_list="train.list",
test_list="train.list",
module='image_provider',
obj='processData',
args=args)
######################Algorithm Configuration #############
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
data_size = 3 * 32 * 32
label_size = 10
img = data_layer(name='image', size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks
predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
dataprovider.pyc
empty.list
train.log
output
train.list
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import random
# define data types of input: 2 real numbers
@provider(
input_types={'x': dense_vector(1),
'y': dense_vector(1)}, use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield {'x': [x], 'y': [2 * x + 0.3]}
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
define_py_data_sources2(
train_list=['no_matter.txt'],
test_list=None,
module='dataprovider',
obj='process',
args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(
input=x,
param_attr=ParamAttr(name='w'),
size=1,
act=LinearActivation(),
bias_attr=ParamAttr(name='b'))
cost = mse_cost(input=y_predict, label=y)
outputs(cost)
import paddle.v2 as paddle
import gzip
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
images = paddle.layer.data(
name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
# Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
lists = []
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append((item[0], ))
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data)
print probs.shape
if __name__ == '__main__':
main()
log.txt
data/meta.bin
data/ml-1m
data/ratings.dat.train
data/ratings.dat.test
data/train.list
data/test.list
dataprovider_copy_1.py
*.pyc
output
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.mse_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
def meta_to_header(meta, name):
metas = meta[name]['__meta__']['raw_meta']
for each_meta in metas:
slot_name = each_meta.get('name', '%s_id' % name)
if each_meta['type'] == 'id':
yield slot_name, integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
yield slot_name, integer_value(
len(each_meta['dict']),
seq_type=SequenceType.SEQUENCE
if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield slot_name, dense_vector(len(each_meta['dict']))
{
"user": {
"file": {
"name": "users.dat",
"delimiter": "::"
},
"fields": ["id", "gender", "age", "occupation"]
},
"movie": {
"file": {
"name": "movies.dat",
"delimiter": "::"
},
"fields": ["id", "title", "genres"]
}
}
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
config_generator.py
Usage:
./config_generator.py <config_file> [--output_format=<output_format>]
./config_generator.py -h | --help
Options:
-h --help Show this screen.
--output_format=<output_format> Output Config format(json or yaml) [default: json].
"""
import json
import docopt
import copy
DEFAULT_FILE = {"type": "split", "delimiter": ","}
DEFAULT_FIELD = {
"id": {
"type": "id"
},
"gender": {
"name": "gender",
"type": "embedding",
"dict": {
"type": "char_based"
}
},
"age": {
"name": "age",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": True
}
},
"occupation": {
"name": "occupation",
"type": "embedding",
"dict": {
"type": "whole_content",
"sort": "true"
}
},
"title": {
"regex": {
"pattern": r"^(.*)\((\d+)\)$",
"group_id": 1,
"strip": True
},
"name": "title",
"type": {
"name": "embedding",
"seq_type": "sequence",
},
"dict": {
"type": "char_based"
}
},
"genres": {
"type": "one_hot_dense",
"dict": {
"type": "split",
"delimiter": "|"
},
"name": "genres"
}
}
def merge_dict(master_dict, slave_dict):
return dict(((k, master_dict.get(k) or slave_dict.get(k))
for k in set(slave_dict) | set(master_dict)))
def main(filename, fmt):
with open(filename, 'r') as f:
conf = json.load(f)
obj = dict()
for k in conf:
val = conf[k]
file_dict = val['file']
file_dict = merge_dict(file_dict, DEFAULT_FILE)
fields = []
for pos, field_key in enumerate(val['fields']):
assert isinstance(field_key, basestring)
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
obj[k] = {"file": file_dict, "fields": fields}
meta = {"meta": obj}
# print meta
if fmt == 'json':
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
else:
raise NotImplementedError("Dump format %s is not implemented" % fmt)
print formatter(meta)
if __name__ == '__main__':
args = docopt.docopt(__doc__, version="0.1.0")
main(args["<config_file>"], args["--output_format"])
{
"meta": {
"movie": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"regex": {
"pattern": "^(.*)\\((\\d+)\\)$",
"group_id": 1,
"strip": true
},
"type": {
"seq_type": "sequence",
"name": "embedding"
},
"dict": {
"type": "char_based"
},
"name": "title",
"pos": 1
},
{
"type": "one_hot_dense",
"dict": {
"delimiter": "|",
"type": "split"
},
"name": "genres",
"pos": 2
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "movies.dat"
}
},
"user": {
"fields": [
{
"type": "id",
"pos": 0
},
{
"type": "embedding",
"dict": {
"type": "char_based"
},
"name": "gender",
"pos": 1
},
{
"type": "embedding",
"dict": {
"sort": true,
"type": "whole_content"
},
"name": "age",
"pos": 2
},
{
"type": "embedding",
"dict": {
"sort": "true",
"type": "whole_content"
},
"name": "occupation",
"pos": 3
}
],
"file": {
"delimiter": "::",
"type": "split",
"name": "users.dat"
}
}
}
}
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Preprocess Movielens dataset, to get movie/user object.
Usage:
./preprocess.py <dataset_dir> <binary_filename> [--config=<config_file>]
./preprocess.py -h | --help
Options:
-h --help Show this screen.
--version Show version.
--config=<config_file> Get MetaData config file [default: config.json].
"""
import docopt
import os
import sys
import re
import collections
try:
import cPickle as pickle
except ImportError:
import pickle
class UniqueIDGenerator(object):
def __init__(self):
self.pool = collections.defaultdict(self.__next_id__)
self.next_id = 0
def __next_id__(self):
tmp = self.next_id
self.next_id += 1
return tmp
def __call__(self, k):
return self.pool[k]
def to_list(self):
ret_val = [None] * len(self.pool)
for k in self.pool.keys():
ret_val[self.pool[k]] = k
return ret_val
class SortedIDGenerator(object):
def __init__(self):
self.__key_set__ = set()
self.dict = None
def scan(self, key):
self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False):
self.__key_set__ = sorted(
list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict()
for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx
def __call__(self, key):
return self.dict[key]
def to_list(self):
return self.__key_set__
class SplitFileReader(object):
def __init__(self, work_dir, config):
assert isinstance(config, dict)
self.filename = config['name']
self.delimiter = config.get('delimiter', ',')
self.work_dir = work_dir
def read(self):
with open(os.path.join(self.work_dir, self.filename), 'r') as f:
for line in f:
line = line.strip()
if isinstance(self.delimiter, unicode):
self.delimiter = str(self.delimiter)
yield line.split(self.delimiter)
@staticmethod
def create(work_dir, config):
assert isinstance(config, dict)
if config['type'] == 'split':
return SplitFileReader(work_dir, config)
class IFileReader(object):
READERS = [SplitFileReader]
def read(self):
raise NotImplementedError()
@staticmethod
def create(work_dir, config):
for reader_cls in IFileReader.READERS:
val = reader_cls.create(work_dir, config)
if val is not None:
return val
class IDFieldParser(object):
TYPE = 'id'
def __init__(self, config):
self.__max_id__ = -sys.maxint - 1
self.__min_id__ = sys.maxint
self.__id_count__ = 0
def scan(self, line):
idx = int(line)
self.__max_id__ = max(self.__max_id__, idx)
self.__min_id__ = min(self.__min_id__, idx)
self.__id_count__ += 1
def parse(self, line):
return int(line)
def meta_field(self):
return {
"is_key": True,
'max': self.__max_id__,
'min': self.__min_id__,
'count': self.__id_count__,
'type': 'id'
}
class SplitEmbeddingDict(object):
def __init__(self, delimiter):
self.__id__ = UniqueIDGenerator()
self.delimiter = delimiter
def scan(self, multi):
for val in multi.split(self.delimiter):
self.__id__(val)
def parse(self, multi):
return map(self.__id__, multi.split(self.delimiter))
def meta_field(self):
return self.__id__.to_list()
class EmbeddingFieldParser(object):
TYPE = 'embedding'
NO_SEQUENCE = "no_sequence"
SEQUENCE = "sequence"
class CharBasedEmbeddingDict(object):
def __init__(self, is_seq=True):
self.__id__ = UniqueIDGenerator()
self.is_seq = is_seq
def scan(self, s):
for ch in s:
self.__id__(ch)
def parse(self, s):
return map(self.__id__, s) if self.is_seq else self.__id__(s[0])
def meta_field(self):
return self.__id__.to_list()
class WholeContentDict(object):
def __init__(self, need_sort=True):
assert need_sort
self.__id__ = SortedIDGenerator()
self.__has_finished__ = False
def scan(self, txt):
self.__id__.scan(txt)
def meta_field(self):
if not self.__has_finished__:
self.__id__.finish_scan()
self.__has_finished__ = True
return self.__id__.to_list()
def parse(self, txt):
return self.__id__(txt)
def __init__(self, config):
try:
self.seq_type = config['type']['seq_type']
except TypeError:
self.seq_type = EmbeddingFieldParser.NO_SEQUENCE
if config['dict']['type'] == 'char_based':
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split':
self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content':
self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
'sort'])
else:
print config
assert False
self.name = config['name']
def scan(self, s):
self.dict.scan(s)
def meta_field(self):
return {
'name': self.name,
'dict': self.dict.meta_field(),
'type': 'embedding',
'seq': self.seq_type
}
def parse(self, s):
return self.dict.parse(s)
class OneHotDenseFieldParser(object):
TYPE = 'one_hot_dense'
def __init__(self, config):
if config['dict']['type'] == 'split':
self.dict = SplitEmbeddingDict(config['dict']['delimiter'])
self.name = config['name']
def scan(self, s):
self.dict.scan(s)
def meta_field(self):
# print self.dict.meta_field()
return {
'dict': self.dict.meta_field(),
'name': self.name,
'type': 'one_hot_dense'
}
def parse(self, s):
ids = self.dict.parse(s)
retv = [0.0] * len(self.dict.meta_field())
for idx in ids:
retv[idx] = 1.0
# print retv
return retv
class FieldParserFactory(object):
PARSERS = [IDFieldParser, EmbeddingFieldParser, OneHotDenseFieldParser]
@staticmethod
def create(config):
if isinstance(config['type'], basestring):
config_type = config['type']
elif isinstance(config['type'], dict):
config_type = config['type']['name']
assert config_type is not None
for each_parser_cls in FieldParserFactory.PARSERS:
if config_type == each_parser_cls.TYPE:
return each_parser_cls(config)
print config
class CompositeFieldParser(object):
def __init__(self, parser, extractor):
self.extractor = extractor
self.parser = parser
def scan(self, *args, **kwargs):
self.parser.scan(self.extractor.extract(*args, **kwargs))
def parse(self, *args, **kwargs):
return self.parser.parse(self.extractor.extract(*args, **kwargs))
def meta_field(self):
return self.parser.meta_field()
class PositionContentExtractor(object):
def __init__(self, pos):
self.pos = pos
def extract(self, line):
assert isinstance(line, list)
return line[self.pos]
class RegexPositionContentExtractor(PositionContentExtractor):
def __init__(self, pos, pattern, group_id, strip=True):
PositionContentExtractor.__init__(self, pos)
pattern = pattern.strip()
self.pattern = re.compile(pattern)
self.group_id = group_id
self.strip = strip
def extract(self, line):
line = PositionContentExtractor.extract(self, line)
match = self.pattern.match(line)
# print line, self.pattern.pattern, match
assert match is not None
txt = match.group(self.group_id)
if self.strip:
txt.strip()
return txt
class ContentExtractorFactory(object):
def extract(self, line):
pass
@staticmethod
def create(config):
if 'pos' in config:
if 'regex' not in config:
return PositionContentExtractor(config['pos'])
else:
extra_args = config['regex']
return RegexPositionContentExtractor(
pos=config['pos'], **extra_args)
class MetaFile(object):
def __init__(self, work_dir):
self.work_dir = work_dir
self.obj = dict()
def parse(self, config):
config = config['meta']
ret_obj = dict()
for key in config.keys():
val = config[key]
assert 'file' in val
reader = IFileReader.create(self.work_dir, val['file'])
assert reader is not None
assert 'fields' in val and isinstance(val['fields'], list)
fields_config = val['fields']
field_parsers = map(MetaFile.__field_config_mapper__, fields_config)
for each_parser in field_parsers:
assert each_parser is not None
for each_block in reader.read():
for each_parser in field_parsers:
each_parser.scan(each_block)
metas = map(lambda x: x.meta_field(), field_parsers)
# print metas
key_index = filter(
lambda x: x is not None,
map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
enumerate(metas)))[0]
key_map = []
for i in range(min(key_index, len(metas))):
key_map.append(i)
for i in range(key_index + 1, len(metas)):
key_map.append(i)
obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block)
val = []
for i, each_parser in enumerate(field_parsers):
if i != key_index:
val.append(each_parser.parse(each_block))
obj[idx] = val
ret_obj[key] = obj
self.obj = ret_obj
return ret_obj
@staticmethod
def __field_config_mapper__(conf):
assert isinstance(conf, dict)
extrator = ContentExtractorFactory.create(conf)
field_parser = FieldParserFactory.create(conf)
assert extrator is not None
assert field_parser is not None
return CompositeFieldParser(field_parser, extrator)
def dump(self, fp):
pickle.dump(self.obj, fp, pickle.HIGHEST_PROTOCOL)
def preprocess(binary_filename, dataset_dir, config, **kwargs):
assert isinstance(config, str)
with open(config, 'r') as config_file:
file_loader = None
if config.lower().endswith('.yaml'):
import yaml
file_loader = yaml
elif config.lower().endswith('.json'):
import json
file_loader = json
config = file_loader.load(config_file)
meta = MetaFile(dataset_dir)
meta.parse(config)
with open(binary_filename, 'wb') as outf:
meta.dump(outf)
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='0.1.0')
kwargs = dict()
for key in args.keys():
if key != '--help':
param_name = key
assert isinstance(param_name, str)
param_name = param_name.replace('<', '')
param_name = param_name.replace('>', '')
param_name = param_name.replace('--', '')
kwargs[param_name] = args[key]
preprocess(**kwargs)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
cd "$(dirname "$0")"
# download the dataset
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
# unzip the dataset
unzip ml-1m.zip
# remove the unused zip file
rm ml-1m.zip
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Separate movielens 1m dataset to train/test file.
Usage:
./separate.py <input_file> [--test_ratio=<test_ratio>] [--delimiter=<delimiter>]
./separate.py -h | --help
Options:
-h --help Show this screen.
--version Show version.
--test_ratio=<test_ratio> Test ratio for separate [default: 0.1].
--delimiter=<delimiter> File delimiter [default: ,].
"""
import docopt
import collections
import random
def process(test_ratio, input_file, delimiter, **kwargs):
test_ratio = float(test_ratio)
rating_dict = collections.defaultdict(list)
with open(input_file, 'r') as f:
for line in f:
user_id = int(line.split(delimiter)[0])
rating_dict[user_id].append(line.strip())
with open(input_file + ".train", 'w') as train_file:
with open(input_file + ".test", 'w') as test_file:
for k in rating_dict.keys():
lines = rating_dict[k]
assert isinstance(lines, list)
random.shuffle(lines)
test_len = int(len(lines) * test_ratio)
for line in lines[:test_len]:
print >> test_file, line
for line in lines[test_len:]:
print >> train_file, line
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='0.1.0')
kwargs = dict()
for key in args.keys():
if key != '--help':
param_name = key
assert isinstance(param_name, str)
param_name = param_name.replace('<', '')
param_name = param_name.replace('>', '')
param_name = param_name.replace('--', '')
kwargs[param_name] = args[key]
process(**kwargs)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
def __list_to_map__(lst):
ret_val = dict()
for each in lst:
k, v = each
ret_val[k] = v
return ret_val
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
data meta.
:param obj: global object. It will passed to process routine.
:type obj: object
:param meta: the meta file object, which passed from trainer_config. Meta
file record movie/user features.
:param kwargs: unused other arguments.
"""
del kwargs # unused kwargs
# Header define slots that used for paddle.
# first part is movie features.
# second part is user features.
# final part is rating score.
# header is a list of [USE_SEQ_OR_NOT?, SlotType]
movie_headers = list(common_utils.meta_to_header(meta, 'movie'))
settings.movie_names = [h[0] for h in movie_headers]
headers = movie_headers
user_headers = list(common_utils.meta_to_header(meta, 'user'))
settings.user_names = [h[0] for h in user_headers]
headers.extend(user_headers)
headers.append(("rating", dense_vector(1))) # Score
# slot types.
settings.input_types = __list_to_map__(headers)
settings.meta = meta
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
for line in f:
# Get a rating from file.
user_id, movie_id, score = map(int, line.split('::')[:-1])
# Scale score to [-5, +5]
score = float(score) * 2 - 5.0
# Get movie/user features by movie_id, user_id
movie_meta = settings.meta['movie'][movie_id]
user_meta = settings.meta['user'][user_id]
outputs = [('movie_id', movie_id - 1)]
# Then add movie features
for i, each_meta in enumerate(movie_meta):
outputs.append((settings.movie_names[i + 1], each_meta))
# Then add user id.
outputs.append(('user_id', user_id - 1))
# Then add user features.
for i, each_meta in enumerate(user_meta):
outputs.append((settings.user_names[i + 1], each_meta))
# Finally, add score
outputs.append(('rating', [score]))
# Return data to paddle
yield __list_to_map__(outputs)
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import math
def get_best_pass(log_filename):
with open(log_filename, 'r') as f:
text = f.read()
pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
sorted_results = sorted(results, key=lambda result: float(result[0]))
return sorted_results[0]
log_filename = sys.argv[1]
log = get_best_pass(log_filename)
predict_error = math.sqrt(float(log[0])) / 2
print 'Best pass is %s, error is %s, which means predict get error as %f' % (
log[1], log[0], predict_error)
evaluate_pass = "output/pass-%s" % log[1]
print "evaluating from pass %s" % evaluate_pass
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1
}
LOG=`get_best_pass log.txt`
LOG=(${LOG})
echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'`
evaluate_pass="output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
#!/bin/env python2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_paddle import swig_paddle, DataProviderConverter
from common_utils import *
from paddle.trainer.config_parser import parse_config
try:
import cPickle as pickle
except ImportError:
import pickle
import sys
if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
meta = pickle.load(f)
headers = [h[1] for h in meta_to_header(meta, 'movie')]
headers.extend([h[1] for h in meta_to_header(meta, 'user')])
cvt = DataProviderConverter(headers)
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
print "Prediction Score is %.2f" % (
(network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
/ 2)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
UNAME_STR=`uname`
if [[ ${UNAME_STR} == 'Linux' ]]; then
SHUF_PROG='shuf'
else
SHUF_PROG='gshuf'
fi
cd "$(dirname "$0")"
delimiter='::'
dir=ml-1m
cd data
echo 'generate meta config file'
python config_generator.py config.json > meta_config.json
echo 'generate meta file'
python meta_generator.py $dir meta.bin --config=meta_config.json
echo 'split train/test file'
python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1
echo 'shuffle train file'
${SHUF_PROG} $dir/ratings.dat.train > ratings.dat.train
cp $dir/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--use_gpu=false \
--trainer_count=4\
--test_all_data_in_one_period=true \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
try:
import cPickle as pickle
except ImportError:
import pickle
is_predict = get_config_arg('is_predict', bool, False)
META_FILE = 'data/meta.bin'
with open(META_FILE, 'rb') as f:
# load meta file
meta = pickle.load(f)
settings(
batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name):
"""
Construct movie/user features.
This method read from meta data. Then convert feature to neural network due
to feature type. The map relation as follow.
* id: embedding => fc
* embedding:
is_sequence: embedding => context_projection => fc => pool
not sequence: embedding => fc
* one_hot_dense: fc => fc
Then gather all features vector, and use a fc layer to combined them as
return.
:param name: 'movie' or 'user'
:type name: basestring
:return: combined feature output
:rtype: LayerOutput
"""
__meta__ = meta[name]['__meta__']['raw_meta']
fusion = []
for each_meta in __meta__:
type_name = each_meta['type']
slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id':
slot_dim = each_meta['max']
embedding = embedding_layer(
input=data_layer(
slot_name, size=slot_dim), size=256)
fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict'])
din = data_layer(slot_name, slot_dim)
embedding = embedding_layer(input=din, size=256)
if is_seq:
fusion.append(
text_conv_pool(
input=embedding, context_len=5, hidden_size=256))
else:
fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict'])
hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
fusion.append(fc_layer(input=hidden, size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
outputs(mse_cost(input=similarity, label=data_layer('rating', size=1)))
define_py_data_sources2(
'data/train.list',
'data/test.list',
module='dataprovider',
obj='process',
args={'meta': meta})
else:
outputs(similarity)
*.pyc
train.log
data/feature
data/conll05st-release/
data/src.dict
data/test.wsj.props
data/test.wsj.seq_pair
data/test.wsj.words
data/tgt.dict
output
data/emb
data/targetDict.txt
data/verbDict.txt
data/wordDict.txt
import math
import numpy as np
import gzip
import logging
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.evaluator as evaluator
import paddle.v2 as paddle
logger = logging.getLogger('paddle')
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
default_std = 1 / math.sqrt(hidden_dim) / 3.0
mix_hidden_lr = 1e-3
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
def db_lstm():
#8 features
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
return feature_out
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def train():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
# create parameters
parameters = paddle.parameters.create(crf_cost)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=crf_dec)
reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
logger.info("Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics))
if event.batch_id and event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, Batch %d, %s" %
(event.pass_id, event.batch_id, result.metrics))
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, %s" %
(event.pass_id, result.metrics))
trainer.train(
reader=reader,
event_handler=event_handler,
num_passes=10,
feeding=feeding)
def infer_a_batch(inferer, test_data, word_dict, pred_dict, label_dict):
probs = inferer.infer(input=test_data, field='id')
assert len(probs) == sum(len(x[0]) for x in test_data)
for idx, test_sample in enumerate(test_data):
start_id = 0
pred_str = "%s\t" % (pred_dict[test_sample[6][0]])
for w, tag in zip(test_sample[0],
probs[start_id:start_id + len(test_sample[0])]):
pred_str += "%s[%s] " % (word_dict[w], label_dict[tag])
print(pred_str.strip())
start_id += len(test_sample[0])
def infer():
label_dict_reverse = dict((value, key)
for key, value in label_dict.iteritems())
word_dict_reverse = dict((value, key)
for key, value in word_dict.iteritems())
pred_dict_reverse = dict((value, key)
for key, value in verb_dict.iteritems())
test_creator = paddle.dataset.conll05.test()
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
test_pass = 0
with gzip.open('params_pass_%d.tar.gz' % (test_pass)) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
inferer = paddle.inference.Inference(
output_layer=predict, parameters=parameters)
# prepare test data
test_data = []
test_batch_size = 50
for idx, item in enumerate(test_creator()):
test_data.append(item[0:8])
if idx and (not idx % test_batch_size):
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
def main(is_inferring=False):
if is_inferring:
infer()
else:
train()
if __name__ == '__main__':
main(is_inferring=False)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from optparse import OptionParser
def extract_dict_features(pair_file, feature_file):
with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
for line in fin:
sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split()
labels_list = labels.split()
verb_index = labels_list.index('B-V')
mark = [0] * len(labels_list)
if verb_index > 0:
mark[verb_index - 1] = 1
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2]
else:
ctx_n2 = 'bos'
mark[verb_index] = 1
ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
else:
ctx_p2 = 'eos'
feature_str = sentence + '\t' \
+ predicate + '\t' \
+ ctx_n2 + '\t' \
+ ctx_n1 + '\t' \
+ ctx_0 + '\t' \
+ ctx_p1 + '\t' \
+ ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \
+ labels
feature_out.write(feature_str + '\n')
if __name__ == '__main__':
usage = '-p pair_file -f feature_file'
parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file')
parser.add_option('-f', dest='feature_file', help='the feature file')
(options, args) = parser.parse_args()
extract_dict_features(options.pair_file, options.feature_file)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from optparse import OptionParser
def read_labels(props_file):
'''
a sentence maybe has more than one verb, each verb has its label sequence
label[], is a 3-dimension list.
the first dim is to store all sentence's label seqs, len is the sentence number
the second dim is to store all label sequences for one sentences
the third dim is to store each label for one word
'''
labels = []
with open(props_file) as fin:
label_seqs_for_one_sentences = []
one_seg_in_file = []
for line in fin:
line = line.strip()
if line == '':
for i in xrange(len(one_seg_in_file[0])):
a_kind_lable = [x[i] for x in one_seg_in_file]
label_seqs_for_one_sentences.append(a_kind_lable)
labels.append(label_seqs_for_one_sentences)
one_seg_in_file = []
label_seqs_for_one_sentences = []
else:
part = line.split()
one_seg_in_file.append(part)
return labels
def read_sentences(words_file):
sentences = []
with open(words_file) as fin:
s = ''
for line in fin:
line = line.strip()
if line == '':
sentences.append(s)
s = ''
else:
s += line + ' '
return sentences
def transform_labels(sentences, labels):
sen_lab_pair = []
for i in xrange(len(sentences)):
if len(labels[i]) == 1:
continue
else:
verb_list = []
for x in labels[i][0]:
if x != '-':
verb_list.append(x)
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
current_tag = 'O'
is_in_bracket = False
label_seq = []
verb_word = ''
for ll in label_list:
if ll == '*' and is_in_bracket == False:
label_seq.append('O')
elif ll == '*' and is_in_bracket == True:
label_seq.append('I-' + current_tag)
elif ll == '*)':
label_seq.append('I-' + current_tag)
is_in_bracket = False
elif ll.find('(') != -1 and ll.find(')') != -1:
current_tag = ll[1:ll.find('*')]
label_seq.append('B-' + current_tag)
is_in_bracket = False
elif ll.find('(') != -1 and ll.find(')') == -1:
current_tag = ll[1:ll.find('*')]
label_seq.append('B-' + current_tag)
is_in_bracket = True
else:
print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
return sen_lab_pair
def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout:
for x in sen_lab_pair:
sentence = x[0]
label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
if __name__ == '__main__':
usage = '-w words_file -p props_file -o output_file'
parser = OptionParser(usage)
parser.add_option('-w', dest='words_file', help='the words file')
parser.add_option('-p', dest='props_file', help='the props file')
parser.add_option('-o', dest='output_file', help='the output_file')
(options, args) = parser.parse_args()
sentences = read_sentences(options.words_file)
labels = read_labels(options.props_file)
sen_lab_pair = transform_labels(sentences, labels)
write_file(sen_lab_pair, options.output_file)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt
wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
cp ./conll05st-release/test.wsj/props/test.wsj.props.gz .
gunzip test.wsj.words.gz
gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
python extract_dict_feature.py -p test.wsj.seq_pair -f feature
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))
]
def get_batch_size(yeild_data):
return len(yeild_data[0])
@provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=True,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import sys
from paddle.trainer_config_helpers import *
#file paths
word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt'
predicate_file = './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
is_test = get_config_arg('is_test', bool, False)
is_predict = get_config_arg('is_predict', bool, False)
if not is_predict:
#load dictionaries
word_dict = dict()
label_dict = dict()
predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \
open(label_dict_file, 'r') as f_label, \
open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word):
w = line.strip()
word_dict[w] = i
for i, line in enumerate(f_label):
w = line.strip()
label_dict[w] = i
for i, line in enumerate(f_pre):
w = line.strip()
predicate_dict[w] = i
if is_test:
train_list_file = None
#define data provider
define_py_data_sources2(
train_list=train_list_file,
test_list=test_list_file,
module='dataprovider',
obj='process',
args={
'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(predicate_dict)
else:
word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int)
pred_len = get_config_arg('pred_len', int)
############################## Hyper-parameters ##################################
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
########################### Optimizer #######################################
settings(
batch_size=150,
learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2,
regularization=L2Regularization(8e-4),
is_async=False,
model_average=ModelAverage(
average_window=0.5, max_average_window=10000), )
####################################### network ##############################
#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len)
predicate = data_layer(name='verb_data', size=pred_len)
ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(
size=word_dim,
input=predicate,
param_attr=ParameterAttribute(
name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(
name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = mixed_layer(
name='hidden' + str(i),
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = lstmemory(
name='lstm' + str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(
name='output',
size=label_dict_len,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict:
crf_l = crf_layer(
name='crf',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw'))
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else:
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
python predict.py -h
"""
UNK_IDX = 0
class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.dict = {}
self.labels = {}
self.predicate_dict = {}
self.labels_reverse = {}
self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict)
len_label = len(self.labels)
len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
slots = [
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file, predicate_dict_file):
"""
Load dictionary from self.dict_file.
"""
for line_count, line in enumerate(open(dict_file, 'r')):
self.dict[line.strip()] = line_count
for line_count, line in enumerate(open(label_file, 'r')):
self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip()
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
"""
with open(data_file, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
def predict(self, data_file, output_file):
"""
data_file: file name of input data.
"""
input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input)
lab = output[0]["id"].tolist()
with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0
for line in fin:
sen = line.split('\t')[0]
len_sen = len(sen.split())
line_labels = lab[index:index + len_sen]
index += len_sen
fout.write(sen + '\t' + ' '.join(
[self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser():
usage = (
"python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
"--tconf",
action="store",
dest="train_conf",
help="network config")
parser.add_option(
"-d",
"--dict",
action="store",
dest="dict_file",
help="dictionary file")
parser.add_option(
"-l",
"--label",
action="store",
dest="label_file",
default=None,
help="label file")
parser.add_option(
"-p",
"--predict_dict_file",
action="store",
dest="predict_dict_file",
default=None,
help="predict_dict_file")
parser.add_option(
"-i",
"--data",
action="store",
dest="data_file",
help="data file to predict")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
parser.add_option(
"-o",
"--output_file",
action="store",
dest="output_file",
default=None,
help="output file")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
data_file = options.data_file
dict_file = options.dict_file
model_path = options.model_path
label_file = options.label_file
predict_dict_file = options.predict_dict_file
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file,
predict_dict_file)
predict.predict(data_file, output_file)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}"
config_file=db_lstm.py
dict_file=./data/wordDict.txt
label_file=./data/targetDict.txt
predicate_dict_file=./data/verbDict.txt
input_file=./data/feature
output_file=predict.res
python predict.py \
-c $config_file \
-w $best_model_path \
-l $label_file \
-p $predicate_dict_file \
-d $dict_file \
-i $input_file \
-o $output_file
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
paddle train \
--config=./db_lstm.py \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \
--num_passes=10000 \
--average_test_period=10000000 \
--init_model_path=./data \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
data/aclImdb
data/imdb
data/pre-imdb
data/mosesdecoder-master
logs/
model_output
dataprovider_copy_1.py
model.list
test.log
train.log
*.pyc
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
integer_value_sequence(len(settings.word_dict)), integer_value(2)
]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(init_hook=hook)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line_count, line in enumerate(fdata):
label, comment = line.strip().split('\t\t')
label = int(label)
words = comment.split()
word_slot = [
settings.word_dict[w] for w in words if w in settings.word_dict
]
if not word_slot:
continue
yield word_slot, label
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
python predict.py -h
"""
class SentimentPrediction():
def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.train_conf = train_conf
self.dict_file = dict_file
self.word_dict = {}
self.dict_dim = self.load_dict()
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.label = None
if label_file is not None:
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(self.model_dir)
input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
def load_dict(self):
"""
Load dictionary from self.dict_file.
"""
for line_count, line in enumerate(open(self.dict_file, 'r')):
self.word_dict[line.strip().split('\t')[0]] = line_count
return len(self.word_dict)
def load_label(self, label_file):
"""
Load label.
"""
self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
def get_index(self, data):
"""
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
input = self.converter(data_batch)
output = self.network.forwardTest(input)
prob = output[0]["value"]
labs = np.argsort(-prob)
for idx, lab in enumerate(labs):
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" % (self.label[lab[0]]))
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-n",
"--tconf",
action="store",
dest="train_conf",
help="network config")
parser.add_option(
"-d",
"--dict",
action="store",
dest="dict_file",
help="dictionary file")
parser.add_option(
"-b",
"--label",
action="store",
dest="label",
default=None,
help="dictionary file")
parser.add_option(
"-c",
"--batch_size",
type="int",
action="store",
dest="batch_size",
default=1,
help="the batch size for prediction")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
batch_size = options.batch_size
dict_file = options.dict_file
model_path = options.model_path
label = options.label
swig_paddle.initPaddle("--use_gpu=0")
predict = SentimentPrediction(train_conf, dict_file, model_path, label)
batch = []
for line in sys.stdin:
words = predict.get_index(line)
if words:
batch.append([words])
else:
print('All the words in [%s] are not in the dictionary.' % line)
if len(batch) == batch_size:
predict.batch_predict(batch)
batch = []
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
#Note the default model is pass-00002, you shold make sure the model path
#exists or change the mode path.
model=model_output/pass-00002/
config=trainer_config.py
label=data/pre-imdb/labels.list
cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
--tconf=$config\
--model=$model \
--label=$label \
--dict=./data/pre-imdb/dict.txt \
--batch_size=1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import random
import operator
import numpy as np
from subprocess import Popen, PIPE
from os.path import join as join_path
from optparse import OptionParser
from paddle.utils.preprocess_util import *
"""
Usage: run following command to show help message.
python preprocess.py -h
"""
def save_dict(dict, filename, is_reverse=True):
"""
Save dictionary into file.
dict: input dictionary.
filename: output file name, string.
is_reverse: True, descending order by value.
False, ascending order by value.
"""
f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse):
f.write('%s\t%s\n' % (k, v))
f.close()
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
tokenizer.perl is tool of Moses.
sentences : a list of input sentences.
return: a list of processed text.
"""
dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
assert isinstance(sentences, list)
text = "\n".join(sentences)
tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
tok_text, _ = tokenizer.communicate(text)
toks = tok_text.split('\n')[:-1]
return toks
def read_lines(path):
"""
path: String, file path.
return a list of sequence.
"""
seqs = []
with open(path, 'r') as f:
for line in f.readlines():
line = line.strip()
if len(line):
seqs.append(line)
return seqs
class SentimentDataSetCreate():
"""
A class to process data for sentiment analysis task.
"""
def __init__(self,
data_path,
output_path,
use_okenizer=True,
multi_lines=False):
"""
data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset
multi_lines: whether a file has multi lines.
In order to shuffle fully, it needs to read all files into
memory, then shuffle them if one file has multi lines.
"""
self.output_path = output_path
self.data_path = data_path
self.train_dir = 'train'
self.test_dir = 'test'
self.train_list = "train.list"
self.test_list = "test.list"
self.label_list = "labels.list"
self.classes_num = 0
self.batch_size = 50000
self.batch_dir = 'batches'
self.dict_file = "dict.txt"
self.dict_with_test = False
self.dict_size = 0
self.word_count = {}
self.tokenizer = use_okenizer
self.overwrite = False
self.multi_lines = multi_lines
self.train_dir = join_path(data_path, self.train_dir)
self.test_dir = join_path(data_path, self.test_dir)
self.train_list = join_path(output_path, self.train_list)
self.test_list = join_path(output_path, self.test_list)
self.label_list = join_path(output_path, self.label_list)
self.dict_file = join_path(output_path, self.dict_file)
def data_list(self, path):
"""
create dataset from path
path: data path
return: data list
"""
label_set = get_label_set_from_dir(path)
data = []
for lab_name in label_set.keys():
file_paths = list_files(join_path(path, lab_name))
for p in file_paths:
data.append({"label" : label_set[lab_name],\
"seq_path": p})
return data, label_set
def create_dict(self, data):
"""
create dict for input data.
data: list, [sequence, sequnce, ...]
"""
for seq in data:
for w in seq.strip().lower().split():
if w not in self.word_count:
self.word_count[w] = 1
else:
self.word_count[w] += 1
def create_dataset(self):
"""
create file batches and dictionary of train data set.
If the self.overwrite is false and train.list already exists in
self.output_path, this function will not create and save file
batches from the data set path.
return: dictionary size, class number.
"""
out_path = self.output_path
if out_path and not os.path.exists(out_path):
os.makedirs(out_path)
# If self.overwrite is false or self.train_list has existed,
# it will not process dataset.
if not (self.overwrite or not os.path.exists(self.train_list)):
print "%s already exists." % self.train_list
return
# Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..."
file_lists = self.save_data(train_data, "train", self.batch_size, True,
True)
save_list(file_lists, self.train_list)
# If have test data path, preprocess test data.
if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir)
assert (train_lab_set == test_lab_set)
print "processing test set..."
file_lists = self.save_data(test_data, "test", self.batch_size,
False, self.dict_with_test)
save_list(file_lists, self.test_list)
# save labels set.
save_dict(train_lab_set, self.label_list, False)
self.classes_num = len(train_lab_set.keys())
# save dictionary.
save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count)
def save_data(self,
data,
prefix="",
batch_size=50000,
is_shuffle=False,
build_dict=False):
"""
Create batches for a Dataset object.
data: the Dataset object to process.
prefix: the prefix of each batch.
batch_size: number of data in each batch.
build_dict: whether to build dictionary for data
return: list of batch names
"""
if is_shuffle and self.multi_lines:
return self.save_data_multi_lines(data, prefix, batch_size,
build_dict)
if is_shuffle:
random.shuffle(data)
num_batches = int(math.ceil(len(data) / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
"%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, len(data))
# read a batch of data
label_list, data_list = self.get_data_list(begin, end, data)
if build_dict:
self.create_dict(data_list)
self.save_file(label_list, data_list, batch_name)
batch_names.append(batch_name)
return batch_names
def get_data_list(self, begin, end, data):
"""
begin: int, begining index of data.
end: int, ending index of data.
data: a list of {"seq_path": seqquence path, "label": label index}
return a list of label and a list of sequence.
"""
label_list = []
data_list = []
for j in range(begin, end):
seqs = read_lines(data[j]["seq_path"])
lab = int(data[j]["label"])
#File may have multiple lines.
for seq in seqs:
data_list.append(seq)
label_list.append(lab)
if self.tokenizer:
data_list = tokenize(data_list)
return label_list, data_list
def save_data_multi_lines(self,
data,
prefix="",
batch_size=50000,
build_dict=False):
"""
In order to shuffle fully, there is no need to load all data if
each file only contains one sample, it only needs to shuffle list
of file name. But one file contains multi lines, each line is one
sample. It needs to read all data into memory to shuffle fully.
This interface is mainly for data containning multi lines in each
file, which consumes more memory if there is a great mount of data.
data: the Dataset object to process.
prefix: the prefix of each batch.
batch_size: number of data in each batch.
build_dict: whether to build dictionary for data
return: list of batch names
"""
assert self.multi_lines
label_list = []
data_list = []
# read all data
label_list, data_list = self.get_data_list(0, len(data), data)
if build_dict:
self.create_dict(data_list)
length = len(label_list)
perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
"%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
sub_data = [data_list[perm_list[i]] for i in range(begin, end)]
self.save_file(sub_label, sub_data, batch_name)
batch_names.append(batch_name)
return batch_names
def save_file(self, label_list, data_list, filename):
"""
Save data into file.
label_list: a list of int value.
data_list: a list of sequnece.
filename: output file name.
"""
f = open(filename, 'w')
print "saving file: %s" % filename
for lab, seq in zip(label_list, data_list):
f.write('%s\t\t%s\n' % (lab, seq))
f.close()
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
parser.add_option(
"-i",
"--data",
action="store",
dest="input",
help="Input data directory.")
parser.add_option(
"-o",
"--output",
action="store",
dest="output",
default=None,
help="Output directory.")
parser.add_option(
"-t",
"--tokenizer",
action="store",
dest="use_tokenizer",
default=True,
help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False,
help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,")
return parser.parse_args()
def main():
options, args = option_parser()
data_dir = options.input
output_dir = options.output
use_tokenizer = options.use_tokenizer
multi_lines = options.multi_lines
if output_dir is None:
outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
multi_lines)
data_creator.create_dataset()
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
echo "Start to preprcess..."
data_dir="./data/imdb"
python preprocess.py -i $data_dir
echo "Done."
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from os.path import join as join_path
from paddle.trainer_config_helpers import *
def sentiment_data(data_dir=None,
is_test=False,
is_predict=False,
train_list="train.list",
test_list="test.list",
dict_file="dict.txt"):
"""
Predefined data provider for sentiment analysis.
is_test: whether this config is used for test.
is_predict: whether this config is used for prediction.
train_list: text file name, containing a list of training set.
test_list: text file name, containing a list of testing set.
dict_file: text file name, containing dictionary.
"""
dict_dim = len(open(join_path(data_dir, "dict.txt")).readlines())
class_dim = len(open(join_path(data_dir, 'labels.list')).readlines())
if is_predict:
return dict_dim, class_dim
if data_dir is not None:
train_list = join_path(data_dir, train_list)
test_list = join_path(data_dir, test_list)
dict_file = join_path(data_dir, dict_file)
train_list = train_list if not is_test else None
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i
define_py_data_sources2(
train_list,
test_list,
module="dataprovider",
obj="process",
args={'dictionary': word_dict})
return dict_dim, class_dim
def bidirectional_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
lstm_dim=128,
is_predict=False):
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
outputs(classification_cost(input=output, label=lbl))
else:
outputs(output)
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3,
is_predict=False):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
is_predict: is predicting or not.
Some layers is not needed in network when predicting.
"""
hid_lr = 1e-3
assert stacked_num % 2 == 1
layer_attr = ExtraLayerAttribute(drop_rate=0.5)
fc_para_attr = ParameterAttribute(learning_rate=hid_lr)
lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = ParameterAttribute(initial_std=0., l2_rate=0.)
relu = ReluActivation()
linear = LinearActivation()
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
lstm1 = lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = fc_layer(
input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
output = fc_layer(
input=[fc_last, lstm_last],
size=class_dim,
act=SoftmaxActivation(),
bias_attr=bias_attr,
param_attr=para_attr)
if is_predict:
outputs(output)
else:
outputs(classification_cost(input=output, label=data_layer('label', 1)))
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
sort -n | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="model_output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
net_conf=trainer_config.py
paddle train --config=$net_conf \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=trainer_config.py
output=./model_output
paddle train --config=$config \
--save_dir=$output \
--job=train \
--use_gpu=false \
--trainer_count=4 \
--num_passes=10 \
--log_period=10 \
--dot_period=20 \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert stacked_num % 2 == 1
layer_attr = paddle.attr.Extra(drop_rate=0.5)
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(
input=inputs[0], pooling_type=paddle.pooling.Max())
lstm_last = paddle.layer.pooling(
input=inputs[1], pooling_type=paddle.pooling.Max())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False)
#data
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
feeding = {'word': 0, 'label': 1}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=2)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sentiment_net import *
from paddle.trainer_config_helpers import *
# whether this config is used for test
is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
model_average=ModelAverage(0.5),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25)
#################### Network Config ######################
stacked_lstm_net(
dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
data/wmt14
data/pre-wmt14
data/wmt14_model
data/paraphrase
data/pre-paraphrase
data/paraphrase_model
translation/gen.log
translation/gen_result
translation/train.log
paraphrase/train.log
dataprovider_copy_1.py
translation/thirdparty.tgz
translation/thirdparty/train.conf
translation/thirdparty/dataprovider.py
translation/thirdparty/seqToseq_net.py
translation/thirdparty/*.dict
*.pyc
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():
paddle.init(use_gpu=False, trainer_count=1)
is_generating = False
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# train the network
if not is_generating:
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost,
event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
# start to train
trainer.train(
reader=wmt14_reader, event_handler=event_handler, num_passes=2)
# generate a english sequence to french
else:
# use the first 3 samples for generation
gen_creator = paddle.dataset.wmt14.gen(dict_size)
gen_data = []
gen_num = 3
for item in gen_creator():
gen_data.append((item[0], ))
if len(gen_data) == gen_num:
break
beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
# get the pretrained model, whose bleu = 26.92
parameters = paddle.dataset.wmt14.model()
# prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer(
output_layer=beam_gen,
parameters=parameters,
input=gen_data,
field=['prob', 'id'])
# get the dictionary
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
# the delimited element of generated sequences is -1,
# the first element of each generated sequence is the sequence length
seq_list = []
seq = []
for w in beam_result[1]:
if w != -1:
seq.append(w)
else:
seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
seq = []
prob = beam_result[0]
beam_size = 3
for i in xrange(gen_num):
print "\n*******************************************************\n"
print "src:", ' '.join(
[src_dict.get(w) for w in gen_data[i][0]]), "\n"
for j in xrange(beam_size):
print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# download the in-house paraphrase dataset
wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz
# untar the dataset
tar -zxvf paraphrase.tar.gz
rm paraphrase.tar.gz
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
dim=32
pretrained_dir='../../model_zoo/embedding/'
preModel=$pretrained_dir'model_'$dim'.emb'
preDict=$pretrained_dir'baidu.dict'
usrDict_dir='pre-paraphrase/'
srcDict=$usrDict_dir'src.dict'
trgDict=$usrDict_dir'trg.dict'
usrModel_dir='paraphrase_model/'
mkdir $usrModel_dir
srcModel=$usrModel_dir'_source_language_embedding'
trgModel=$usrModel_dir'_target_language_embedding'
echo 'extract desired parameters based on user dictionary'
script=$pretrained_dir'extract_para.py'
python $script --preModel $preModel --preDict $preDict \
--usrModel $srcModel --usrDict $srcDict -d $dim
python $script --preModel $preModel --preDict $preDict \
--usrModel $trgModel --usrDict $trgDict -d $dim
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
mkdir wmt14
cd wmt14
# download the dataset
wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz
wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz
# untar the dataset
tar -zxvf bitexts.tgz
tar -zxvf dev+test.tgz
gunzip bitexts.selected/*
mv bitexts.selected train
rm bitexts.tgz
rm dev+test.tgz
# separate the dev and test dataset
mkdir test gen
mv dev/ntst1213.* test
mv dev/ntst14.* gen
rm -rf dev
set +x
# rename the suffix, .fr->.src, .en->.trg
for dir in train test gen
do
filelist=`ls $dir`
cd $dir
for file in $filelist
do
if [ ${file##*.} = "fr" ]; then
mv $file ${file/%fr/src}
elif [ ${file##*.} = 'en' ]; then
mv $file ${file/%en/trg}
fi
done
cd ..
done
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# download the pretrained model
wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz
# untar the model
tar -zxvf wmt14_model.tar.gz
rm wmt14_model.tar.gz
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 2
START = "<s>"
END = "<e>"
def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list,
**kwargs):
# job_mode = 1: training mode
# job_mode = 0: generating mode
settings.job_mode = not is_generating
def fun(dict_path):
out_dict = dict()
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
settings.src_dict = fun(src_dict_path)
settings.trg_dict = fun(trg_dict_path)
settings.logger.info("src dict len : %d" % (len(settings.src_dict)))
if settings.job_mode:
settings.slots = {
'source_language_word':
integer_value_sequence(len(settings.src_dict)),
'target_language_word':
integer_value_sequence(len(settings.trg_dict)),
'target_language_next_word':
integer_value_sequence(len(settings.trg_dict))
}
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else:
settings.slots = {
'source_language_word':
integer_value_sequence(len(settings.src_dict)),
'sent_id':
integer_value_sequence(len(open(file_list[0], "r").readlines()))
}
def _get_ids(s, dictionary):
words = s.strip().split()
return [dictionary[START]] + \
[dictionary.get(w, UNK_IDX) for w in words] + \
[dictionary[END]]
@provider(init_hook=hook, pool_size=50000)
def process(settings, file_name):
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
line_split = line.strip().split('\t')
if settings.job_mode and len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, settings.src_dict)
if settings.job_mode:
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [settings.trg_dict[END]]
trg_ids = [settings.trg_dict[START]] + trg_ids
yield {
'source_language_word': src_ids,
'target_language_word': trg_ids,
'target_language_next_word': trg_ids_next
}
else:
yield {'source_language_word': src_ids, 'sent_id': [line_count]}
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
is_generating = False
### Data Definiation
train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase",
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--config='paraphrase/train.conf' \
--save_dir='paraphrase/model' \
--init_model_path='data/paraphrase_model' \
--load_missing_parameter_strategy=rand \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
#!/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
Options:
-h, --help show this help message and exit
-i INPUT input original dataset path
-d DICTSIZE specified word count of dictionary
-m --mergeDict merge source and target dictionary
"""
import os
import sys
import string
from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater
class SeqToSeqDatasetCreater(DatasetCreater):
"""
A class to process data for sequence to sequence application.
"""
def __init__(self, data_path, output_path):
"""
data_path: the path to store the train data, test data and gen data
output_path: the path to store the processed dataset
"""
DatasetCreater.__init__(self, data_path)
self.gen_dir_name = 'gen'
self.gen_list_name = 'gen.list'
self.output_path = output_path
def concat_file(self, file_path, file1, file2, output_path, output):
"""
Concat file1 and file2 to be one output file
The i-th line of output = i-th line of file1 + '\t' + i-th line of file2
file_path: the path to store file1 and file2
output_path: the path to store output file
"""
file1 = os.path.join(file_path, file1)
file2 = os.path.join(file_path, file2)
output = os.path.join(output_path, output)
if not os.path.exists(output):
os.system('paste ' + file1 + ' ' + file2 + ' > ' + output)
def cat_file(self, dir_path, suffix, output_path, output):
"""
Cat all the files in dir_path with suffix to be one output file
dir_path: the base directory to store input file
suffix: suffix of file name
output_path: the path to store output file
"""
cmd = 'cat '
file_list = os.listdir(dir_path)
file_list.sort()
for file in file_list:
if file.endswith(suffix):
cmd += os.path.join(dir_path, file) + ' '
output = os.path.join(output_path, output)
if not os.path.exists(output):
os.system(cmd + '> ' + output)
def build_dict(self, file_path, dict_path, dict_size=-1):
"""
Create the dictionary for the file, Note that
1. Valid characters include all printable characters
2. There is distinction between uppercase and lowercase letters
3. There is 3 special token:
<s>: the start of a sequence
<e>: the end of a sequence
<unk>: a word not included in dictionary
file_path: the path to store file
dict_path: the path to store dictionary
dict_size: word count of dictionary
if is -1, dictionary will contains all the words in file
"""
if not os.path.exists(dict_path):
dictory = dict()
with open(file_path, "r") as fdata:
for line in fdata:
line = line.split('\t')
for line_split in line:
words = line_split.strip().split()
for word in words:
if word not in dictory:
dictory[word] = 1
else:
dictory[word] += 1
output = open(dict_path, "w+")
output.write('<s>\n<e>\n<unk>\n')
count = 3
for key, value in sorted(
dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n")
count += 1
if count == dict_size:
break
self.dict_size = count
def create_dataset(self,
dict_size=-1,
mergeDict=False,
suffixes=['.src', '.trg']):
"""
Create seqToseq dataset
"""
# dataset_list and dir_list has one-to-one relationship
train_dataset = os.path.join(self.data_path, self.train_dir_name)
test_dataset = os.path.join(self.data_path, self.test_dir_name)
gen_dataset = os.path.join(self.data_path, self.gen_dir_name)
dataset_list = [train_dataset, test_dataset, gen_dataset]
train_dir = os.path.join(self.output_path, self.train_dir_name)
test_dir = os.path.join(self.output_path, self.test_dir_name)
gen_dir = os.path.join(self.output_path, self.gen_dir_name)
dir_list = [train_dir, test_dir, gen_dir]
# create directory
for dir in dir_list:
if not os.path.exists(dir):
os.mkdir(dir)
# checkout dataset should be parallel corpora
suffix_len = len(suffixes[0])
for dataset in dataset_list:
file_list = os.listdir(dataset)
if len(file_list) % 2 == 1:
raise RuntimeError("dataset should be parallel corpora")
file_list.sort()
for i in range(0, len(file_list), 2):
if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
raise RuntimeError(
"source and target file name should be equal")
# cat all the files with the same suffix in dataset
for suffix in suffixes:
for dataset in dataset_list:
outname = os.path.basename(dataset) + suffix
self.cat_file(dataset, suffix, dataset, outname)
# concat parallel corpora and create file.list
print 'concat parallel corpora for dataset'
id = 0
list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list:
outname = os.path.basename(dataset)
self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname)
save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id]))
id += 1
# build dictionary for train data
dict = ['src.dict', 'trg.dict']
dict_path = [
os.path.join(self.output_path, dict[0]),
os.path.join(self.output_path, dict[1])
]
if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data'
self.build_dict(outname, dict_path[0], dict_size)
print 'build trg dictionary for train data'
os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else:
outname = os.path.join(train_dataset, self.train_dir_name)
for id in range(0, 2):
suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size
def main():
usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage)
parser.add_option(
"-i", action="store", dest="input", help="input original dataset path")
parser.add_option(
"-d",
action="store",
dest="dictsize",
help="specified word count of dictionary")
parser.add_option(
"-m",
"--mergeDict",
action="store_true",
dest="mergeDict",
help="merge source and target dictionary")
(options, args) = parser.parse_args()
if options.input[-1] == os.path.sep:
options.input = options.input[:-1]
outname = os.path.basename(options.input)
output_path = os.path.join(os.path.dirname(options.input), 'pre-' + outname)
dictsize = int(options.dictsize) if options.dictsize else -1
if not os.path.exists(output_path):
os.mkdir(output_path)
data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict)
if __name__ == "__main__":
main()
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
from paddle.trainer_config_helpers import *
def seq_to_seq_data(data_dir,
is_generating,
dict_size=30000,
train_list='train.list',
test_list='test.list',
gen_list='gen.list',
gen_result='gen_result'):
"""
Predefined seqToseq train data provider for application
is_generating: whether this config is used for generating
dict_size: word count of dictionary
train_list: a text file containing a list of training data
test_list: a text file containing a list of testing data
gen_list: a text file containing a list of generating data
gen_result: a text file containing generating result
"""
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
if is_generating:
train_list = None
test_list = os.path.join(data_dir, gen_list)
else:
train_list = os.path.join(data_dir, train_list)
test_list = os.path.join(data_dir, test_list)
define_py_data_sources2(
train_list,
test_list,
module="dataprovider",
obj="process",
args={
"src_dict_path": src_lang_dict,
"trg_dict_path": trg_lang_dict,
"is_generating": is_generating
})
return {
"src_dict_path": src_lang_dict,
"trg_dict_path": trg_lang_dict,
"gen_result": gen_result
}
def gru_encoder_decoder(data_conf,
is_generating,
word_vector_dim=512,
encoder_size=512,
decoder_size=512,
beam_size=3,
max_length=250,
error_clipping=50):
"""
A wrapper for an attention version of GRU Encoder-Decoder network
is_generating: whether this config is used for generating
encoder_size: dimension of hidden unit in GRU Encoder network
decoder_size: dimension of hidden unit in GRU Decoder network
word_vector_dim: dimension of word vector
beam_size: expand width in beam search
max_length: a stop condition of sequence generation
"""
for k, v in data_conf.iteritems():
globals()[k] = v
source_dict_dim = len(open(src_dict_path, "r").readlines())
target_dict_dim = len(open(trg_dict_path, "r").readlines())
gen_trans_file = gen_result
src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
src_embedding = embedding_layer(
input=src_word_id,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(
input=src_embedding,
size=encoder_size,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
src_backward = simple_gru(
input=src_embedding,
size=encoder_size,
reverse=True,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
with mixed_layer(
size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_naive_layer(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size,
layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
with mixed_layer(
size=target_dict_dim, bias_attr=True,
act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_inputs = [
StaticInput(
input=encoded_vector, is_seq=True), StaticInput(
input=encoded_proj, is_seq=True)
]
if not is_generating:
trg_embedding = embedding_layer(
input=data_layer(
name='target_language_word', size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
seqtext_printer_evaluator(
input=beam_gen,
id_input=data_layer(
name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
gen_file=$1
beam_size=$2
# find top1 generating result
top1=$(printf '%s_top1.txt' `basename $gen_file .txt`)
if [ $beam_size -eq 1 ]; then
awk -F "\t" '{sub(" <e>","",$2);sub(" ","",$2);print $2}' $gen_file >$top1
else
awk 'BEGIN{
FS="\t";
OFS="\t";
read_pos = 2} {
if (NR == read_pos){
sub(" <e>","",$3);
sub(" ","",$3);
print $3;
read_pos += (2 + res_num);
}}' res_num=$beam_size $gen_file >$top1
fi
# evalute bleu value
bleu_script=multi-bleu.perl
standard_res=../data/wmt14/gen/ntst14.trg
bleu_res=`perl $bleu_script $standard_res <$top1`
echo $bleu_res
rm $top1
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
# whether this config is used for generating
is_generating = True
### Data Definiation
gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating,
gen_result = "./translation/gen_result")
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 1,
learning_rate = 0)
### Network Architecture
gru_encoder_decoder(gen_conf, is_generating)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--job=test \
--config='translation/gen.conf' \
--save_dir='data/wmt14_model' \
--use_gpu=false \
--num_passes=13 \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
echo "Downloading multi-bleu.perl"
wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl --no-check-certificate
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
from seqToseq_net import *
# whether this config is used for generating
is_generating = False
### Data Definiation
data_dir = "./data/pre-wmt14"
train_conf = seq_to_seq_data(data_dir = data_dir,
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cd ..
paddle train \
--config='translation/train.conf' \
--save_dir='translation/model' \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
import gzip
import math
import paddle.v2 as paddle
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def main():
# for local training
cluster_train = False
if not cluster_train:
paddle.init(use_gpu=False, trainer_count=1)
else:
paddle.init(
use_gpu=False,
trainer_count=2,
port=7164,
ports_num=1,
ports_num_for_sparse=1,
num_gradient_servers=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
'w') as f:
trainer.save_parameter_to_tar(f)
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
......@@ -59,6 +59,11 @@ context_projection
.. autoclass:: paddle.v2.layer.context_projection
:noindex:
row_conv
--------
.. autoclass:: paddle.v2.layer.row_conv
:noindex:
Image Pooling Layer
===================
......@@ -130,7 +135,7 @@ recurrent_group
---------------
.. autoclass:: paddle.v2.layer.recurrent_group
:noindex:
lstm_step
---------
.. autoclass:: paddle.v2.layer.lstm_step
......@@ -145,12 +150,12 @@ beam_search
------------
.. autoclass:: paddle.v2.layer.beam_search
:noindex:
get_output
----------
.. autoclass:: paddle.v2.layer.get_output
:noindex:
Mixed Layer
===========
......@@ -203,7 +208,7 @@ trans_full_matrix_projection
----------------------------
.. autoclass:: paddle.v2.layer.trans_full_matrix_projection
:noindex:
Aggregate Layers
================
......@@ -346,6 +351,12 @@ sampling_id
.. autoclass:: paddle.v2.layer.sampling_id
:noindex:
multiplex
---------
.. autoclass:: paddle.v2.layer.multiplex
:noindex:
Slicing and Joining Layers
==========================
......@@ -434,10 +445,26 @@ smooth_l1_cost
.. autoclass:: paddle.v2.layer.smooth_l1_cost
:noindex:
Check Layer
Check Layer
============
eos
---
.. autoclass:: paddle.v2.layer.eos
:noindex:
Miscs
=====
dropout
--------------
.. autoclass:: paddle.v2.layer.dropout
:noindex:
Activation with learnable parameter
===================================
prelu
--------
.. autoclass:: paddle.v2.layer.prelu
:noindex:
......@@ -125,11 +125,3 @@ simple_attention
:members: simple_attention
:noindex:
Miscs
=====
dropout_layer
--------------
.. automodule:: paddle.v2.networks
:members: dropout_layer
:noindex:
......@@ -10,7 +10,7 @@ A dataset is a list of files in *RecordIO* format. A RecordIO file consists of c
## Task Queue
As mentioned in [distributed training design doc](./README.md), a *task* is a data shard that the master server assigns to the trainer process to train on. A task consists of one or multiple *blocks* from one or multiple files. The master server maintains *task queues* to track the training progress.
As mentioned in [distributed training design doc](./README.md), a *task* is a data shard that the master server assigns to the trainer process to train on. A task consists of one or multiple *chunks* from one or multiple files. The master server maintains *task queues* to track the training progress.
### Task Queue Creation
......@@ -21,23 +21,23 @@ As mentioned in [distributed training design doc](./README.md), a *task* is a da
func (m *RPCServer) ReportDataset(Paths []string, dummy *int) error {
}
```
1. The master server will scan through each RecordIO file to generate the *block index* and know how many blocks does each file have. A block can be referenced by the file path and the index of the block within the file. The block index is in memory data structure that enables fast access to each block, and the index of the block with the file is an integer start from 0, representing the n-th block within the file.
1. The master server will scan through each RecordIO file to generate the *chunk index* and know how many chunks does each file have. A chunk can be referenced by the file path and the index of the chunk within the file. The chunk index is in memory data structure that enables fast access to each chunk, and the index of the chunk with the file is an integer start from 0, representing the n-th chunk within the file.
The definition of the block is:
The definition of the chunk is:
```go
type Block struct {
Idx int // index of the block within the file
type Chunk struct {
Idx int // index of the chunk within the file
Path string
Index recordio.Index // block index
Index recordio.Index // chunk index
}
```
1. Blocks are grouped into tasks, and tasks are filled into the todo queue. The pending queue and the done queue are initialized with no element.
1. Chunks are grouped into tasks, and tasks are filled into the todo queue. The pending queue and the done queue are initialized with no element.
The definition of the task is:
```go
type Task struct {
Index int
Blocks []Block
Chunks []Chunk
}
```
......
......@@ -55,7 +55,7 @@ The trainer select process is encapsulated in the C API function:
```c
int paddle_begin_init_params(paddle_pserver_client* client, const char* config_proto);
```
The selected trainer's call to `paddle_begin_init_params` will return with 1, and the other trainers' call to `paddle_begin_init_params` will block until initialization is done, and return 0. As illustrated below:
The selected trainer's call to `paddle_begin_init_params` will return with 1, and the other trainers' call to `paddle_begin_init_params` will return 0. `paddle_get_params` will be blocked until initialization is completed. As illustrated below:
<img src="./src/pserver_init.png">
......@@ -89,16 +89,13 @@ void paddle_pserver_client_release(paddle_pserver_client* client);
*
* paddle_begin_init_params will be called from multiple trainers,
* only one trainer will be selected to initialize the parameters on
* parameter servers. Other trainers will be blocked until the
* initialization is done, and they need to get the initialized
* parameter servers. Other trainers need to get the initialized
* parameters from parameter servers using @paddle_get_params.
*
* @param pserver_config_proto serialized parameter server configuration in
* Protocol Buffers format.
* @return 1 if the trainer is selected to initialize parameter
* servers, otherwise 0.
*/
int paddle_begin_init_params(paddle_pserver_client* client, const char* pserver_config_proto);
int paddle_begin_init_params(paddle_pserver_client* client);
/**
* @brief paddle_init_param initializes the parameter on parameter
......@@ -106,12 +103,13 @@ int paddle_begin_init_params(paddle_pserver_client* client, const char* pserver_
*
* @param param the parameter to initialize.
* @param param_config_proto the configuration for the parameter.
* @param config_len the length of param_config_proto
* @return 0 if successful, otherwise -1. On failure, the trainer
* needs to restart the entire initialization process (starting from
* @paddle_begin_init_param). Or simply exit the program and wait for
* the cluster management system to restart the trainer.
*/
int paddle_init_param(paddle_pserver_client* client, paddle_parameter params, const char* param_config_proto);
int paddle_init_param(paddle_pserver_client* client, paddle_parameter param, const unsigned char* param_config_proto, int config_len);
/**
* @brief paddle_finish_init_params tells parameter servers client has
......@@ -138,6 +136,9 @@ int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grad
/**
* @brief paddle_get_params gets parameters from parameter servers.
*
* paddle_get_params will block until parameters are initialized on
* the parameter servers.
*
* @param names the array of names of the parameters to get.
* @param dst the destination array of parameters to save to.
* @param len the length of the names array and the paddle_parameter
......
# Design Doc: The C++ Class `Parameters`
`Parameters` is a concept we designed in Paddle V2 API. `Parameters` is a container of parameters, and make Paddle can shared parameter between topologies. We described usages of `Parameter` in [api.md](./api.md).
We used Python to implement Parameters when designing V2 API before. There are several defects for current implementation:
* We just use `memcpy` to share Parameters between topologies, but this is very inefficient.
* We did not implement share Parameters while training. We just trigger `memcpy` when start training.
It is necessary that we implement Parameters in CPP side. However, it could be a code refactoring for Paddle, because Paddle was designed for training only one topology before, i.e., each GradientMachine contains its Parameter as a data member. In current Paddle implementation, there are three concepts associated with `Parameters`:
1. `paddle::Parameter`. A `Parameters` is a container for `paddle::Parameter`.
It is evident that we should use `paddle::Parameter` when developing `Parameters`.
However, the `Parameter` class contains many functions and does not have a clear interface.
It contains `create/store Parameter`, `serialize/deserialize`, `optimize(i.e SGD)`, `randomize/zero`.
When we developing `Parameters`, we only use `create/store Parameter` functionality.
We should extract functionalities of Parameter into many classes to clean Paddle CPP implementation.
2. `paddle::GradientMachine` and its sub-classes, e.g., `paddle::MultiGradientMachine`, `paddle::NeuralNetwork`.
We should pass `Parameters` to `paddle::GradientMachine` when `forward/backward` to avoid `memcpy` between topologies.
Also, we should handle multi-GPU/CPU training, because `forward` and `backward` would perform on multi-GPUs and multi-CPUs.
`Parameters` should dispatch the parameter value to each device, and gather the parameter gradient from each device.
3. `paddle::ParameterUpdater`. The ParameterUpdater is used to update parameters in Paddle.
So `Parameters` should be used by `paddle::ParameterUpdater`, and `paddle::ParameterUpdater` should optimize `Parameters` (by SGD).
The step by step approach for implementation Parameters in Paddle C++ core is listed below. Each step should be a PR and could be merged into Paddle one by one.
1. Clean `paddle::Parameter` interface. Extract the functionalities of `paddle::Parameter` to prepare for the implementation of Parameters.
2. Implementation a `Parameters` class. It just stores the `paddle::Parameter` inside. Make `GradientMachine` uses `Parameters` as a class member.
3. Make `Parameters` support Multi-CPU and Multi-GPU training to prepare for sharing `Parameter` between topologies.
Because we need share `Parameters` between topologies, it is `Parameters`'s response to exchange Parameters between GPUs.
`GradientMachine` should not handle how to exchange Parameters because `GradientMachine` only used to train one topology and we need to support train many topologies in Paddle, i.e., there could be many GradientMachines use one `Parameters`.
* We should use a global function to exchange Parameters between GPUs, not a member function in `Parameters`. The `MultiGradientMachine` invoke this function, which uses `Parameters` as this function inputs.
* The MultiGradientMachine contains many functionalities. Extracting the Parameters exchanging logic could make MultiGradientMachine clearer and simpler.
4. Make `Parameters` as an argument for `forward/backward` function, not a data member for `GradientMachine`. For example, `forward` could be `forward(const Parameters& params, ...)` and `backward` could be `backward(Parameters* params, ...)`. After this step, Paddle could share `Parameters` between topologies.
5. `ParameterUpdater` is invoked by `GradientMachine` and `Trainer`, but it updates `Parameters`. In the end of this code refactoring, we could change `ParameterUpdater` directly uses `Parameters` to make `ParameterUpdater`'s implementation clear.
# DeepSpeech2 on PaddlePaddle: Design Doc
We are planning to build Deep Speech 2 (DS2) \[[1](#references)\], a powerful Automatic Speech Recognition (ASR) engine, on PaddlePaddle. For the first-stage plan, we have the following short-term goals:
- Release a basic distributed implementation of DS2 on PaddlePaddle.
- Contribute a chapter of Deep Speech to PaddlePaddle Book.
Intensive system optimization and low-latency inference library (details in \[[1](#references)\]) are not yet covered in this first-stage plan.
## Table of Contents
- [Tasks](#tasks)
- [Task Dependency](#task-dependency)
- [Design Details](#design-details)
- [Overview](#overview)
- [Row Convolution](#row-convolution)
- [Beam Search With CTC and LM](#beam-search-with-ctc-and-lm)
- [Future Work](#future-work)
- [References](#references)
## Tasks
We roughly break down the project into 14 tasks:
1. Develop an **audio data provider**:
- Json filelist generator.
- Audio file format transformer.
- Spectrogram feature extraction, power normalization etc.
- Batch data reader with SortaGrad.
- Data augmentation (optional).
- Prepare (one or more) public English data sets & baseline.
2. Create a **simplified DS2 model configuration**:
- With only fixed-length (by padding) audio sequences (otherwise need *Task 3*).
- With only bidirectional-GRU (otherwise need *Task 4*).
- With only greedy decoder (otherwise need *Task 5, 6*).
3. Develop to support **variable-shaped** dense-vector (image) batches of input data.
- Update `DenseScanner` in `dataprovider_converter.py`, etc.
4. Develop a new **lookahead-row-convolution layer** (See \[[1](#references)\] for details):
- Lookahead convolution windows.
- Within-row convolution, without kernels shared across rows.
5. Build KenLM **language model** (5-gram) for beam search decoder:
- Use KenLM toolkit.
- Prepare the corpus & train the model.
- Create infererence interfaces (for Task 6).
6. Develop a **beam search decoder** with CTC + LM + WORDCOUNT:
- Beam search with CTC.
- Beam search with external custom scorer (e.g. LM).
- Try to design a more general beam search interface.
7. Develop a **Word Error Rate evaluator**:
- update `ctc_error_evaluator`(CER) to support WER.
8. Prepare internal dataset for Mandarin (optional):
- Dataset, baseline, evaluation details.
- Particular data preprocessing for Mandarin.
- Might need cooperating with the Speech Department.
9. Create **standard DS2 model configuration**:
- With variable-length audio sequences (need *Task 3*).
- With unidirectional-GRU + row-convolution (need *Task 4*).
- With CTC-LM beam search decoder (need *Task 5, 6*).
10. Make it run perfectly on **clusters**.
11. Experiments and **benchmarking** (for accuracy, not efficiency):
- With public English dataset.
- With internal (Baidu) Mandarin dataset (optional).
12. Time **profiling** and optimization.
13. Prepare **docs**.
14. Prepare PaddlePaddle **Book** chapter with a simplified version.
## Task Dependency
Tasks parallelizable within phases:
Roadmap | Description | Parallelizable Tasks
----------- | :------------------------------------ | :--------------------
Phase I | Simplified model & components | *Task 1* ~ *Task 8*
Phase II | Standard model & benchmarking & profiling | *Task 9* ~ *Task 12*
Phase III | Documentations | *Task13* ~ *Task14*
Issue for each task will be created later. Contributions, discussions and comments are all highly appreciated and welcomed!
## Design Details
### Overview
Traditional **ASR** (Automatic Speech Recognition) pipelines require great human efforts devoted to elaborately tuning multiple hand-engineered components (e.g. audio feature design, accoustic model, pronuncation model and language model etc.). **Deep Speech 2** (**DS2**) \[[1](#references)\], however, trains such ASR models in an end-to-end manner, replacing most intermediate modules with only a single deep network architecture. With scaling up both the data and model sizes, DS2 achieves a very significant performance boost.
Please read Deep Speech 2 \[[1](#references),[2](#references)\] paper for more background knowledge.
The classical DS2 network contains 15 layers (from bottom to top):
- **Two** data layers (audio spectrogram, transcription text)
- **Three** 2D convolution layers
- **Seven** uni-directional simple-RNN layers
- **One** lookahead row convolution layers
- **One** fully-connected layers
- **One** CTC-loss layer
<div align="center">
<img src="image/ds2_network.png" width=350><br/>
Figure 1. Archetecture of Deep Speech 2 Network.
</div>
We don't have to persist on this 2-3-7-1-1-1 depth \[[2](#references)\]. Similar networks with different depths might also work well. As in \[[1](#references)\], authors use a different depth (e.g. 2-2-3-1-1-1) for final experiments.
Key ingredients about the layers:
- **Data Layers**:
- Frame sequences data of audio **spectrogram** (with FFT).
- Token sequences data of **transcription** text (labels).
- These two type of sequences do not have the same lengthes, thus a CTC-loss layer is required.
- **2D Convolution Layers**:
- Not only temporal convolution, but also **frequency convolution**. Like a 2D image convolution, but with a variable dimension (i.e. temporal dimension).
- With striding for only the first convlution layer.
- No pooling for all convolution layers.
- **Uni-directional RNNs**
- Uni-directional + row convolution: for low-latency inference.
- Bi-direcitional + without row convolution: if we don't care about the inference latency.
- **Row convolution**:
- For looking only a few steps ahead into the feature, instead of looking into a whole sequence in bi-directional RNNs.
- Not nessesary if with bi-direcitional RNNs.
- "**Row**" means convolutions are done within each frequency dimension (row), and no convolution kernels shared across.
- **Batch Normalization Layers**:
- Added to all above layers (except for data and loss layer).
- Sequence-wise normalization for RNNs: BatchNorm only performed on input-state projection and not state-state projection, for efficiency consideration.
Required Components | PaddlePaddle Support | Need to Develop
:------------------------------------- | :-------------------------------------- | :-----------------------
Data Layer I (Spectrogram) | Not supported yet. | TBD (Task 3)
Data Layer II (Transcription) | `paddle.data_type.integer_value_sequence` | -
2D Convolution Layer | `paddle.layer.image_conv_layer` | -
DataType Converter (vec2seq) | `paddle.layer.block_expand` | -
Bi-/Uni-directional RNNs | `paddle.layer.recurrent_group` | -
Row Convolution Layer | Not supported yet. | TBD (Task 4)
CTC-loss Layer | `paddle.layer.warp_ctc` | -
Batch Normalization Layer | `paddle.layer.batch_norm` | -
CTC-Beam search | Not supported yet. | TBD (Task 6)
### Row Convolution
TODO by Assignees
### Beam Search with CTC and LM
TODO by Assignees
## Future Work
- Efficiency Improvement
- Accuracy Improvement
- Low-latency Inference Library
- Large-scale benchmarking
## References
1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016.
2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595.
......@@ -12,13 +12,13 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次
PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 提供最新
`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国
内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您
在国内,请把文档里命令中的paddlepaddle/paddle替换成
docker.paddlepaddle.org/paddle。
1. 开发镜像::code:`paddlepaddle/paddle:<version>-dev`
1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev`
这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布,
文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。
......@@ -37,13 +37,13 @@ docker.paddlepaddle.org/paddle。
.. code-block:: bash
docker run -it --rm paddlepaddle/paddle:<version>-dev /bin/bash
docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash
或者,可以以后台进程方式运行容器:
.. code-block:: bash
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:<version>-dev
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev
然后用密码 :code:`root` SSH进入容器:
......@@ -73,7 +73,7 @@ docker.paddlepaddle.org/paddle。
.. code-block:: bash
nvidia-docker run -it --rm paddledev/paddle:0.10.0rc1-gpu /bin/bash
nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash
注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。:
......@@ -81,7 +81,7 @@ docker.paddlepaddle.org/paddle。
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:<version>-gpu
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu
3. 运行以及发布您的AI程序
......@@ -98,7 +98,7 @@ docker.paddlepaddle.org/paddle。
nvidia-docker run -it -v $PWD:/work paddle /work/a.py
这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:<version>`
这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0`
创建和发布自己的AI程序镜像。
运行PaddlePaddle Book
......@@ -177,7 +177,7 @@ Paddle的Docker开发镜像带有一个通过 `woboq code browser
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:<version>-dev
docker run -d --name paddle-cpu-doc paddle:0.10.0-dev
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
......@@ -23,7 +23,7 @@ Docker is simple as long as we understand a few basic concepts:
.. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0rc2
docker pull paddlepaddle/paddle:0.10.0
to download a Docker image, paddlepaddle/paddle in this example,
from Dockerhub.com.
......@@ -35,7 +35,7 @@ Docker is simple as long as we understand a few basic concepts:
.. code-block:: bash
docker run paddlepaddle/paddle:0.10.0rc2
docker run paddlepaddle/paddle:0.10.0
to start a container to run a Docker image, paddlepaddle/paddle in this example.
......@@ -62,7 +62,7 @@ of PaddlePaddle, we release both of them. Production image includes
CPU-only version and a CUDA GPU version and their no-AVX versions.
We put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddledev/paddle/>`_. You can find the
<https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
latest versions under "tags" tab at dockerhub.com. If you are in
China, you can use our Docker image registry mirror to speed up the
download process. To use it, please replace all paddlepaddle/paddle in
......@@ -89,7 +89,7 @@ the commands to docker.paddlepaddle.org/paddle.
.. code-block:: bash
docker run -it --rm paddlepaddle/paddle:0.10.0rc2 /bin/bash
docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash
Above method work with the GPU image too -- the recommended way is
using `nvidia-docker <https://github.com/NVIDIA/nvidia-docker>`_.
......@@ -101,7 +101,7 @@ the commands to docker.paddlepaddle.org/paddle.
.. code-block:: bash
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
2. development image :code:`paddlepaddle/paddle:<version>-dev`
......@@ -149,13 +149,13 @@ Run the program using docker:
.. code-block:: bash
docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 python /workspace/example.py
docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py
Or if you are using GPU for training:
.. code-block:: bash
nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu python /workspace/example.py
nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py
Above commands will start a docker container by running :code:`python
/workspace/example.py`. It will stop once :code:`python
......@@ -166,7 +166,7 @@ run PaddlePaddle program interactively:
.. code-block:: bash
docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 /bin/bash
docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash
# now we are inside docker container
cd /workspace
python example.py
......@@ -175,7 +175,7 @@ Running with GPU is identical:
.. code-block:: bash
nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash
# now we are inside docker container
cd /workspace
python example.py
......
......@@ -7,4 +7,4 @@
build_and_install/index_cn.rst
concepts/use_concepts_cn.rst
- `深度学习入门课程 <http://book.paddlepaddle.org/>`_
- `深度学习入门课程 <http://book.paddlepaddle.org/index.cn.html>`_
......@@ -6,4 +6,4 @@ GET STARTED
build_and_install/index_en.rst
- `Deep Learning 101 <http://book.paddlepaddle.org/index.en.html>`_
- `Deep Learning 101 <http://book.paddlepaddle.org/index.html>`_
......@@ -28,17 +28,17 @@ pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API
seq_pool = pooling(input=layer,
pooling_type=pooling.Max(),
agg_level=AggregateLevel.EACH_SEQUENCE)
agg_level=AggregateLevel.TO_SEQUENCE)
- `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值):
- 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列
- 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值)
- `agg_level=AggregateLevel.EACH_SEQUENCE` 时:
- `agg_level=AggregateLevel.TO_SEQUENCE` 时:
- 作用:一个双层序列经过运算变成一个单层序列
- 输入:必须是一个双层序列
......@@ -52,15 +52,15 @@ last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详
.. code-block:: bash
last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE)
agg_level=AggregateLevel.TO_SEQUENCE)
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值):
- 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列
- 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。
- `agg_level=AggregateLevel.EACH_SEQUENCE` 时:
- `agg_level=AggregateLevel.TO_SEQUENCE` 时:
- 作用:一个双层序列经过运算变成一个单层序列
- 输入:必须是一个双层序列
- 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。
......@@ -74,9 +74,9 @@ expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。
ex = expand(input=layer1,
expand_as=layer2,
expand_level=ExpandLevel.FROM_TIMESTEP)
expand_level=ExpandLevel.FROM_NO_SEQUENCE)
- `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值):
- `expand_level=ExpandLevel.FROM_NO_SEQUENCE` 时(默认值):
- 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列
- 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息
......
......@@ -81,7 +81,7 @@
* 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。
* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.EACH_SEQUENCE`\ 。
* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.TO_SEQUENCE`\ 。
* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。
......
......@@ -84,7 +84,7 @@ no changes added to commit (use "git add" and/or "git commit -a")
➜ docker build -t paddle:dev .
```
随后可以用这个开发镜像开build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以:
随后可以用这个开发镜像开build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以:
```bash
➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
......
......@@ -4,9 +4,9 @@ We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code.
## Code Requirements
- Your code must be fully documented by
[doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler
- Your code comments must be fully documented by
[Doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler
passes the code style check.
- All code must have unit test.
- Pass all unit tests.
......@@ -20,32 +20,25 @@ It's just that simple.
## Clone
Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
The **develop** is the main branch, and other user's branches are feature branches.
Clone remote repository.
Once you've created a fork, you can use your favorite git client to clone your
repo or just head straight to the command line:
```shell
# Clone your fork to your local machine
git clone --branch develop https://github.com/USERNAME/Paddle.git
```
If your repository doesn't contain **develop** branch, just create it by your own.
```shell
git clone https://github.com/USERNAME/Paddle.git Paddle
cd Paddle
git checkout -b develop # create develop branch.
git remote add upstream https://github.com/PaddlePaddle/Paddle.git # add upstream to baidu/Paddle
git pull upstream develop # update to upstream
```bash
➜ git clone https://github.com/USERNAME/Paddle
cd Paddle
```
Then you can start to develop by making a local developement branch
## Create a local branch
Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
```shell
git checkout -b MY_COOL_STUFF_BRANCH
All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch .
```bash
➜ git checkout -b my-cool-stuff
```
Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`.
## Using `pre-commit` hook
Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git
......@@ -58,89 +51,169 @@ To use [pre-commit](http://pre-commit.com/), you should install it by
`pip install pre-commit`, and currently, Paddle uses `clang-format` to format
c/cpp sources. Please make sure clang-format 3.8+ installed.
Then just run `pre-commit install` in your Paddle clone directory. When you
commit your code, the pre-commit hook will check the local code if there is
Install and run it as follow:
```bash
➜ pip install pre-commit
➜ pre-commit install
```
When you commit your code, the pre-commit hook will check the local code if there is
anything not suitable to commit, and so on.
## Start to develop
In this tutorial, I delete a line in README.md and created a new file.
We can use `git status` to inspect the changes of current directory, `git diff` to see difference.
```bash
➜ git status
On branch test
Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git checkout -- <file>..." to discard changes in working directory)
modified: README.md
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
no changes added to commit (use "git add" and/or "git commit -a")
```
## Build and Test
We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs.
If you want to build the develop image, just run:
```bash
➜ docker build -t paddle:dev .
```
Then we can use the develop image to build PaddlePaddle source. For example:
```bash
➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
```
The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated.
Then we can generate the production image by copying the compiled PaddlePaddle program into the image by
```bash
➜ docker build -t paddle:prod -f build/Dockerfile .
```
Run unit test finally:
```bash
➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest"
```
For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst).
## Commit
Commit your changes by following command lines:
Next we cancel the changes to the README.md file and then commit our changes by following command lines:
```bash
➜ git checkout -- README.md
➜ git status
On branch test
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
nothing added to commit but untracked files present (use "git add" to track)
➜ git add test
```
```shell
# show the working tree status
git status
# add modified files
git add xx
env EDITOR=vim git commit # You can write your comments by vim/nano/emacs.
We should write a description of each commit by `git commit` to allow others to know
the changes in these files.
```bash
➜ git commit
CRLF end-lines remover...............................(no files to check)Skipped
yapf.................................................(no files to check)Skipped
Check for added large files..............................................Passed
Check for merge conflicts................................................Passed
Check for broken symlinks................................................Passed
Detect Private Key...................................(no files to check)Skipped
Fix End of Files.....................................(no files to check)Skipped
clang-formater.......................................(no files to check)Skipped
[my-cool-stuff c703c041] add test file
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 233
```
The first line of commit infomation is the title. The second and later lines
are the details if any.
## Keeping Fork Up to Date
Before pull your request, you should sync your code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first:
```shell
# see the current configured remote repository
git remote -v
# add upstream repository
git remote add upstream https://github.com/PaddlePaddle/Paddle.git
# verify the new upstream
git remote -v
```bash
➜ git remote add upstream https://github.com/PaddlePaddle/Paddle
➜ git remote
origin
upstream
```
Update your fork with the latest upstream changes:
```shell
git pull --rebase upstream develop
```bash
➜ git fetch upstream
➜ git pull upstream develop
```
If there are no unique commits locally, git will simply perform a fast-forward.
However, if you have been making changes (in the vast majority of cases you
probably shouldn't be), you may have to deal with conflicts.
Now, your local master branch is up-to-date with everything modified upstream.
## Push to GitHub
```shell
```bash
# push to your repository in Github
git push -u origin MY_COOL_STUFF_BRANCH # create remote branch MY_COOL_STUFF_BRANCH to origin.
➜ git push origin my-cool-stuff
```
## Pull Request
## Create an issue and a Pull Request
Create an Issue to describe the problem and record its number.
Go to the page for your fork on GitHub, select your development branch,
and click the **pull request button**.
## Update your pull request with the lastest version
During the code review, your pull request may become stale because new commits in
baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this
by clicking the "Update Branch" button in your pull request page. However, in the case
of conflict, you need to do the update manually. You need to do the following on
your local repository:
```shell
git checkout MY_COOL_STUFF_BRANCH
git pull upstream develop
# You may need to resolve the conflict according to the git prompt.
# Make and test your code.
git push origin MY_COOL_STUFF_BRANCH
and click the `New pull request`.
<img width="295" alt="screen shot 2017-04-26 at 9 09 28 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436054/a6d98c66-2ac4-11e7-9cb1-18dd13150230.png">
Then select the target branch:
<img width="750" alt="screen shot 2017-04-26 at 9 11 52 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436139/f83b1e6c-2ac4-11e7-8c0e-add499023c46.png">
We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in <https://help.github.com/articles/closing-issues-via-commit-messages/>.
Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch.
## Delete origin branch
After the PR is merge into the main repository, we can delete the remote branch on the PR page.
<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
Or just run:
```bash
➜ git push origin :my-cool-stuff
```
Now your Pull Request is updated with the latest version.
## Revise your pull request
## Delete local branch
When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request.
Finally, we delete local branch:
The possible commands are
```bash
➜ git checkout develop
```shell
git checkout MY_COOL_STUFF_BRANCH
git pull upstream develop # update local to newest code base.
# May be some conflicts will occured.
# And develop your cool stuff
env EDITOR=vim git commit # add your revise log
git push origin MY_COOL_STUFF_BRANCH
# delete my-cool-stuff branch
➜ git branch -D my-cool-stuff
```
......@@ -38,7 +38,7 @@ endif()
mark_as_advanced(CMAKE_Go_COMPILER)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CMakeGoCompiler.cmake.in
configure_file(${CMAKE_MODULE_PATH}/CMakeGoCompiler.cmake.in
${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY)
set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER")
......@@ -21,7 +21,7 @@ function(CheckCompilerCXX11Flag)
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
endif()
endif()
endif()
endif()
endfunction()
......@@ -42,4 +42,4 @@ if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
endif()
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
\ No newline at end of file
set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS})
set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
file(MAKE_DIRECTORY ${GOPATH})
set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle")
file(MAKE_DIRECTORY ${PADDLE_IN_GOPATH})
function(ExternalGoProject_Add TARG)
add_custom_target(${TARG} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get ${ARGN})
endfunction(ExternalGoProject_Add)
function(add_go_executable NAME)
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${NAME} DESTINATION bin)
endfunction(add_go_executable)
function(ADD_GO_LIBRARY NAME BUILD_TYPE)
function(GO_LIBRARY NAME BUILD_TYPE)
if(BUILD_TYPE STREQUAL "STATIC")
set(BUILD_MODE -buildmode=c-archive)
set(LIB_NAME "lib${NAME}.a")
......@@ -32,15 +17,34 @@ function(ADD_GO_LIBRARY NAME BUILD_TYPE)
endif()
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
file(RELATIVE_PATH rel ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
# find Paddle directory.
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
get_filename_component(PADDLE_DIR ${PARENT_DIR} DIRECTORY)
# automatically get all dependencies specified in the source code
# for given target.
add_custom_target(goGet env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${rel}/...)
# make a symlink that references Paddle inside $GOPATH, so go get
# will use the local changes in Paddle rather than checkout Paddle
# in github.
add_custom_target(copyPaddle
COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH})
add_dependencies(goGet copyPaddle)
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
${CMAKE_GO_FLAGS} ${GO_SOURCE}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN})
add_dependencies(${NAME} goGet)
if(NOT BUILD_TYPE STREQUAL "STATIC")
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} DESTINATION bin)
endif()
endfunction(ADD_GO_LIBRARY)
endfunction(GO_LIBRARY)
package main
import (
"fmt"
"net"
"net/http"
"net/rpc"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/recordio"
)
func main() {
port := flag.Int("port", 8080, "port of the master server.")
dataset := flag.String("training_dataset", "", "dataset: comma separated path to RecordIO paths, supports golb patterns.")
faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.")
flag.Parse()
if *dataset == "" {
panic("no dataset specified.")
}
if *faultTolerance {
panic("fault tolernance not implemented.")
}
var chunks []master.Chunk
var paths []string
ss := strings.Split(*dataset, ",")
fmt.Println(ss)
for _, s := range ss {
match, err := filepath.Glob(s)
if err != nil {
panic(err)
}
paths = append(paths, match...)
}
if len(paths) == 0 {
panic("no valid datset specified.")
}
idx := 0
for _, path := range paths {
f, err := os.Open(path)
if err != nil {
panic(err)
}
index, err := recordio.LoadIndex(f)
if err != nil {
panic(err)
}
f.Close()
count := index.NumChunks()
for i := 0; i < count; i++ {
chunk := master.Chunk{
Idx: idx,
Path: path,
Index: *index.ChunkIndex(i),
}
chunks = append(chunks, chunk)
}
}
s := master.NewService(chunks, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
err := rpc.Register(s)
if err != nil {
panic(err)
}
rpc.HandleHTTP()
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
if err != nil {
panic(err)
}
err = http.Serve(l, nil)
if err != nil {
panic(err)
}
}
package main
import (
"flag"
"net"
"net/http"
"net/rpc"
"strconv"
"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
"github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/pserver"
)
func main() {
port := flag.Int("p", 0, "port of the pserver")
port := flag.Int("port", 0, "port of the pserver")
flag.Parse()
s := pserver.NewService()
......
package master
import (
"errors"
"log"
"sync"
"time"
"github.com/PaddlePaddle/recordio"
)
const (
targetTaskCount = 300
)
// errors
var (
ErrNoMoreTask = errors.New("no more task for current pass")
ErrPendingTaskNotFound = errors.New("pending task not found")
)
// Service is the master server service.
type Service struct {
timeoutDur time.Duration
timeoutMax int
mu sync.Mutex
taskQueues taskQueues
}
// Recover recovers service state from etcd.
func Recover() (*Service, error) {
// TODO(helin): recover from snapshot state from etcd.
return nil, nil
}
func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
id := 0
if chunksPerTask <= 0 {
chunksPerTask = 1
}
var result []taskEntry
var cur taskEntry
for i, c := range chunks {
if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 {
cur.Task.ID = id
id++
result = append(result, cur)
cur.Task.Chunks = nil
}
cur.Task.Chunks = append(cur.Task.Chunks, c)
}
if len(cur.Task.Chunks) > 0 {
cur.Task.ID = id
id++
result = append(result, cur)
}
return result
}
// NewService creates a new service.
func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service {
s := &Service{}
s.timeoutDur = timeoutDur
s.timeoutMax = timeoutMax
s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry)
s.taskQueues.Todo = partition(chunks, chunksPerTask)
return s
}
// Chunk is a chunk of data consisted of several data instances.
type Chunk struct {
Idx int // index of the chunk within the file
Path string
Index recordio.Index // block index
}
// Task is the basic unit of data instances assigned to trainers.
type Task struct {
ID int
Chunks []Chunk
}
type taskEntry struct {
Epoch int
NumTimeout int
Task Task
}
type taskQueues struct {
Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry
Failed []Task
}
// *must* be called with s.mu being held.
func (s *Service) snapshot() error {
// TODO(helin): snapshot state on etcd.
return nil
}
// GetTask gets a new task from the service.
func (s *Service) GetTask(dummy int, task *Task) error {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.taskQueues.Todo) == 0 {
return ErrNoMoreTask
}
t := s.taskQueues.Todo[0]
t.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.ID] = t
err := s.snapshot()
if err != nil {
return err
}
time.AfterFunc(s.timeoutDur, func(taskID int, epoch int) func() {
return func() {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return
}
if t.Epoch != epoch {
// new epoch, task launched after the
// schedule of this timeout check.
return
}
defer func() {
err := s.snapshot()
if err != nil {
log.Println(err)
}
}()
delete(s.taskQueues.Pending, t.Task.ID)
t.NumTimeout++
if t.NumTimeout > s.timeoutMax {
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task)
return
}
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
}
}(t.Task.ID, t.Epoch))
return nil
}
// TaskFinished tell the service that a task is finished.
func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return ErrPendingTaskNotFound
}
// task finished, reset timeout
t.NumTimeout = 0
s.taskQueues.Done = append(s.taskQueues.Done, t)
delete(s.taskQueues.Pending, taskID)
return s.snapshot()
}
package master
import "testing"
func TestPartitionCount(t *testing.T) {
cs := make([]Chunk, 100)
ts := partition(cs, 5)
if len(ts) != 20 {
t.Error(len(ts))
}
cs = make([]Chunk, 101)
ts = partition(cs, 5)
if len(ts) != 21 {
t.Error(len(ts))
}
ts = partition(cs, 1)
if len(ts) != 101 {
t.Error(len(ts))
}
ts = partition(cs, 0)
if len(ts) != 101 {
t.Error(len(ts))
}
}
func TestPartionIndex(t *testing.T) {
cs := make([]Chunk, 100)
ts := partition(cs, 20)
for i := range ts {
if ts[i].Task.ID != i {
t.Error(ts[i], i)
}
}
}
cmake_minimum_required(VERSION 3.0)
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
project(cxx_go C Go)
include(golang)
include(flags)
go_library(client STATIC)
add_subdirectory(test)
......@@ -39,10 +39,11 @@ import "C"
import (
"log"
"strings"
"sync"
"unsafe"
"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
"github.com/PaddlePaddle/Paddle/go/pserver"
)
var nullPtr = unsafe.Pointer(uintptr(0))
......@@ -78,34 +79,54 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte {
return nil
}
// create a Go clice backed by a C array,
// reference: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
// create a Go clice backed by a C array, reference:
// https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
//
// Go garbage collector will not interact with this data, need
// to be freed properly.
return (*[1 << 30]byte)(p)[:len:len]
}
type selector bool
func (s selector) Select() bool {
return bool(s)
}
type lister []pserver.Server
func (l lister) List() []pserver.Server {
return l
}
//export paddle_new_pserver_client
func paddle_new_pserver_client(addr *C.char) C.client {
c := pserver.NewClient(C.GoString(addr))
func paddle_new_pserver_client(addrs *C.char, selected int) C.client {
a := C.GoString(addrs)
as := strings.Split(a, ",")
servers := make([]pserver.Server, len(as))
for i := range as {
servers[i].Index = i
servers[i].Addr = as[i]
}
c := pserver.NewClient(lister(servers), len(as), selector(selected != 0))
return add(c)
}
//export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.client {
// TODO(helin): fault tolerant pserver client using etcd.
panic("not implemented.")
}
//export paddle_pserver_client_release
func paddle_pserver_client_release(client C.client) {
c := remove(client)
c.Cleanup()
remove(client)
}
//export paddle_begin_init_params
func paddle_begin_init_params(client C.client, pserver_config unsafe.Pointer, config_len C.int) C.int {
func paddle_begin_init_params(client C.client) C.int {
c := get(client)
b := cArrayToSlice(pserver_config, int(config_len))
selected, err := c.BeginInitParams(b)
if err != nil {
log.Println(err)
return -1
}
if selected {
if selected := c.BeginInitParams(); selected {
return 1
}
return 0
......@@ -227,7 +248,7 @@ func paddle_get_params(client C.client, names **C.char, dst **C.paddle_parameter
func paddle_save_model(client C.client, path *C.char) C.int {
p := C.GoString(path)
c := get(client)
err := c.SaveModel(p)
err := c.Save(p)
if err != nil {
log.Println(err)
return -1
......
cmake_minimum_required(VERSION 3.0)
include_directories(${CMAKE_BINARY_DIR})
add_executable(main main.c)
add_dependencies(main client)
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif()
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)
#include "libclient.h"
#include <stdio.h>
//#include "gtest/gtest.h"
#include "libclient.h"
void panic() {
void fail() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
*(void*)0;
printf("test failed.\n");
exit(-1);
}
int main() {
char addr[] = "localhost:3000";
client c = paddle_new_pserver_client(addr);
client c = paddle_new_pserver_client(addr, 1);
retry:
if (paddle_begin_init_params(c, NULL, 0)) {
if (paddle_begin_init_params(c)) {
paddle_parameter param;
char name_a[] = "param_a";
char name_b[] = "param_b";
char content[] = {0x00, 0x11, 0x22};
unsigned char content[] = {0x00, 0x11, 0x22};
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_a;
param.content = content;
......@@ -35,34 +36,34 @@ retry:
goto retry;
}
} else {
panic();
fail();
}
char content[] = {0x00, 0x11, 0x22};
unsigned char content[] = {0x00, 0x11, 0x22};
paddle_gradient grads[2] = {
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3},
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}};
if (!paddle_send_grads(c, grads, 2)) {
panic();
fail();
}
paddle_parameter* params[2] = {NULL, NULL};
char* names[] = {"param_a", "param_b"};
if (!paddle_get_params(c, names, params, 2)) {
panic();
fail();
}
// get parameters again by reusing the allocated parameter buffers.
if (!paddle_get_params(c, names, params, 2)) {
panic();
fail();
}
paddle_release_param(params[0]);
paddle_release_param(params[1]);
if (!paddle_save_model(c, "/tmp/")) {
panic();
fail();
}
return 0;
......
package pserver
import (
"hash/fnv"
"log"
"sort"
"time"
"github.com/PaddlePaddle/Paddle/go/pserver/internal/connection"
)
// TODO(helin): add RPC call retry logic
// Selector selects if the client should initialize parameter servers.
type Selector interface {
Select() bool
}
// Server is the identification of a parameter Server.
type Server struct {
Index int
Addr string
}
// Lister lists currently available parameter servers.
type Lister interface {
List() []Server
}
// Client is the client to parameter servers.
type Client struct {
sel Selector
pservers []*connection.Conn
}
// NewClient creates a new client.
func NewClient(l Lister, pserverNum int, sel Selector) *Client {
c := &Client{sel: sel}
c.pservers = make([]*connection.Conn, pserverNum)
for i := 0; i < pserverNum; i++ {
c.pservers[i] = connection.New()
}
go c.monitorPservers(l, pserverNum)
return c
}
// monitorPservers monitors pserver addresses, and updates connection
// when the address changes.
func (c *Client) monitorPservers(l Lister, pserverNum int) {
knownServers := make([]Server, pserverNum)
ticker := time.NewTicker(10 * time.Second)
monitor := func() {
curServers := make([]Server, pserverNum)
list := l.List()
for _, l := range list {
curServers[l.Index] = l
}
for i := range knownServers {
if knownServers[i].Addr != curServers[i].Addr {
err := c.pservers[i].Connect(curServers[i].Addr)
if err != nil {
log.Println(err)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curServers[i].Addr = knownServers[i].Addr
}
}
}
knownServers = curServers
}
monitor()
for _ = range ticker.C {
monitor()
}
}
// BeginInitParams begins to initialize parameters on parameter
// servers.
//
// BeginInitParams will be called from multiple trainers, only one
// trainer will be selected to initialize the parameters on parameter
// servers. Other trainers will be blocked until the initialization is
// done, and they need to get the initialized parameters from
// parameter servers using GetParams.
func (c *Client) BeginInitParams() bool {
return c.sel.Select()
}
// InitParam initializes the parameter on parameter servers.
func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error {
var dummy int
return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, &dummy)
}
// FinishInitParams tells parameter servers client has sent all
// parameters to parameter servers as initialization.
func (c *Client) FinishInitParams() error {
for _, p := range c.pservers {
var dummy int
err := p.Call("Service.FinishInitParams", dummy, &dummy)
if err != nil {
return err
}
}
return nil
}
// SendGrads sends gradients to parameter servers for updating
// parameters.
func (c *Client) SendGrads(grads []Gradient) error {
errCh := make(chan error, len(grads))
for _, g := range grads {
go func(g Gradient) {
var dummy int
err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, &dummy)
errCh <- err
}(g)
}
recv := 0
for err := range errCh {
if err != nil {
return err
}
recv++
if recv == len(grads) {
break
}
}
return nil
}
type result struct {
idx int
param Parameter
err error
}
type results []result
func (r results) Len() int {
return len(r)
}
func (r results) Less(i int, j int) bool {
return r[i].idx < r[j].idx
}
func (r results) Swap(i int, j int) {
r[i], r[j] = r[j], r[i]
}
// GetParams gets parameters from parameter servers.
func (c *Client) GetParams(names []string) ([]Parameter, error) {
rCh := make(chan result, len(names))
for idx, name := range names {
go func(name string, idx int) {
var parameter Parameter
err := c.pservers[c.partition(name)].Call("Service.GetParam", name, &parameter)
rCh <- result{idx: idx, param: parameter, err: err}
}(name, idx)
}
var rs results
recv := 0
for r := range rCh {
if r.err != nil {
return nil, r.err
}
rs = append(rs, r)
recv++
if recv == len(names) {
break
}
}
sort.Sort(rs)
ps := make([]Parameter, len(rs))
for i := range rs {
ps[i] = rs[i].param
}
return ps, nil
}
// Save indicates parameters to save the parameter to the given path.
func (c *Client) Save(path string) error {
errCh := make(chan error, len(c.pservers))
for _, p := range c.pservers {
var dummy int
err := p.Call("Service.Save", path, &dummy)
errCh <- err
}
recv := 0
for err := range errCh {
if err != nil {
return err
}
recv++
if recv == len(c.pservers) {
break
}
}
// TODO(helin): there will be many files under path, need to
// merge them into a single file.
return nil
}
func strHash(s string) uint32 {
h := fnv.New32a()
h.Write([]byte(s))
return h.Sum32()
}
// TODO(helin): now partition only select which parameter server to
// send the entire parameter. We need to partition a parameter into
// small blocks and send to different parameter servers.
func (c *Client) partition(key string) int {
return int(strHash(key) % uint32(len(c.pservers)))
}
package pserver_test
import (
"net"
"net/http"
"net/rpc"
"strconv"
"strings"
"testing"
"github.com/PaddlePaddle/Paddle/go/pserver"
)
const numPserver = 10
var port [numPserver]int
func init() {
for i := 0; i < numPserver; i++ {
l, err := net.Listen("tcp", ":0")
if err != nil {
panic(err)
}
ss := strings.Split(l.Addr().String(), ":")
p, err := strconv.Atoi(ss[len(ss)-1])
if err != nil {
panic(err)
}
port[i] = p
go func(l net.Listener) {
s := pserver.NewService()
server := rpc.NewServer()
err := server.Register(s)
if err != nil {
panic(err)
}
mux := http.NewServeMux()
mux.Handle(rpc.DefaultRPCPath, server)
err = http.Serve(l, mux)
if err != nil {
panic(err)
}
}(l)
}
}
type selector bool
func (s selector) Select() bool {
return bool(s)
}
type lister []pserver.Server
func (l lister) List() []pserver.Server {
return l
}
func TestClientFull(t *testing.T) {
servers := make([]pserver.Server, numPserver)
for i := 0; i < numPserver; i++ {
servers[i] = pserver.Server{Index: i, Addr: ":" + strconv.Itoa(port[i])}
}
c := pserver.NewClient(lister(servers), len(servers), selector(true))
selected := c.BeginInitParams()
if !selected {
t.Fatal("should be selected.")
}
const numParameter = 100
for i := 0; i < numParameter; i++ {
var p pserver.Parameter
p.Name = "p_" + strconv.Itoa(i)
p.ElementType = pserver.Float32
p.Content = make([]byte, (i+1)*100)
err := c.InitParam(pserver.ParameterWithConfig{Param: p})
if err != nil {
t.Fatal(err)
}
}
err := c.FinishInitParams()
if err != nil {
t.Fatal(err)
}
var grads []pserver.Gradient
for i := 0; i < numParameter/2; i++ {
var g pserver.Gradient
g.Name = "p_" + strconv.Itoa(i)
g.ElementType = pserver.Float32
g.Content = make([]byte, (i+1)*100)
grads = append(grads, g)
}
err = c.SendGrads(grads)
if err != nil {
t.Fatal(err)
}
names := make([]string, numParameter)
for i := 0; i < numParameter; i++ {
names[i] = "p_" + strconv.Itoa(i)
}
params, err := c.GetParams(names)
if err != nil {
t.Fatal(err)
}
if len(names) != len(params) {
t.Fatalf("parameter size not match, need: %d, have: %d", len(names), len(params))
}
for i := range params {
if names[i] != params[i].Name {
t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i])
}
}
}
package connection
import (
"errors"
"net/rpc"
"sync"
)
// TODO(helin): add TCP re-connect logic
// Conn is a connection to a parameter server
type Conn struct {
mu sync.Mutex
client *rpc.Client
waitConn chan struct{}
}
// New creates a new connection.
func New() *Conn {
c := &Conn{}
return c
}
// Connect connects the connection to a address.
func (c *Conn) Connect(addr string) error {
c.mu.Lock()
if c.client != nil {
err := c.client.Close()
if err != nil {
c.mu.Unlock()
return err
}
c.client = nil
}
c.mu.Unlock()
client, err := rpc.DialHTTP("tcp", addr)
if err != nil {
return err
}
c.mu.Lock()
defer c.mu.Unlock()
if c.client == nil {
c.client = client
if c.waitConn != nil {
close(c.waitConn)
c.waitConn = nil
}
} else {
return errors.New("client already set from a concurrent goroutine")
}
return nil
}
// Call make a RPC call.
//
// Call will be blocked until the connection to remote RPC service
// being established.
func (c *Conn) Call(serviceMethod string, args interface{}, reply interface{}) error {
c.mu.Lock()
client := c.client
var waitCh chan struct{}
if client == nil {
if c.waitConn != nil {
waitCh = c.waitConn
} else {
waitCh = make(chan struct{})
c.waitConn = waitCh
}
}
c.mu.Unlock()
if waitCh != nil {
// wait until new connection being established
<-waitCh
return c.Call(serviceMethod, args, reply)
}
return client.Call(serviceMethod, args, reply)
}
......@@ -29,11 +29,11 @@ func newOptimizer(t optimizerType, learning_rate float64) *optimizer {
func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error {
if len(p.Content) != len(g.Content) {
return fmt.Errorf("parameter and gradient length not match, parameter: %d, gradient: %d", len(p.Content), len(g.Content))
return fmt.Errorf("Name: %s, parameter and gradient length not match, parameter: %d, gradient: %d", p.Name, len(p.Content), len(g.Content))
}
if p.ElementType != g.ElementType {
return fmt.Errorf("parameter and gradient element type not match, parameter: %v, gradient: %v", p.ElementType, g.ElementType)
return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", p.Name, p.ElementType, g.ElementType)
}
r := C.paddle_update_parameter(o.opt, unsafe.Pointer(&p.Content[0]), C.paddle_element_type(p.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content)))
......
......@@ -49,33 +49,12 @@ type Service struct {
// NewService creates a new service.
func NewService() *Service {
s := &Service{}
s := &Service{opt: newOptimizer(sgd, 0.01)}
s.paramMap = make(map[string]Parameter)
s.initialized = make(chan struct{})
return s
}
// BeginInitParams tells the parameter server that the parameter
// initialization has begun.
func (s *Service) BeginInitParams(config []byte, dummy *int) error {
select {
case <-s.initialized:
return ErrAlreadyInitialized
default:
}
s.mu.Lock()
defer s.mu.Unlock()
if s.opt != nil {
s.opt.Cleanup()
}
// TODO(helin): parse learning rate from config
s.opt = newOptimizer(sgd, 0.01)
return nil
}
// InitParam initializes a parameter.
func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
select {
......@@ -109,75 +88,45 @@ func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
return nil
}
// SendGrads sends gradients to parameter servers for parameter
// SendGrad sends gradient to parameter servers for parameter
// optimization.
func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
func (s *Service) SendGrad(g Gradient, dummy *int) error {
select {
case <-s.initialized:
default:
return ErrUninitialized
}
count := len(grads)
if count == 0 {
return nil
}
s.mu.Lock()
defer s.mu.Unlock()
for _, g := range grads {
if _, ok := s.paramMap[g.Name]; !ok {
return fmt.Errorf("parameter: %s does not exist", g.Name)
}
}
errCh := make(chan error, count)
for _, g := range grads {
go func(p Parameter, g Gradient) {
err := s.opt.UpdateParameter(p, g)
errCh <- err
}(s.paramMap[g.Name], g)
p, ok := s.paramMap[g.Name]
if !ok {
return fmt.Errorf("parameter: %s does not exist", g.Name)
}
recv := 0
for err := range errCh {
if err != nil {
return err
}
recv++
if recv == count {
break
}
}
return nil
return s.opt.UpdateParameter(p, g)
}
// GetParams gets parameters from the parameter server.
func (s *Service) GetParams(names []string, parameters *[]Parameter) error {
// GetParam gets parameters from the parameter server.
func (s *Service) GetParam(name string, parameter *Parameter) error {
<-s.initialized
s.mu.Lock()
defer s.mu.Unlock()
for _, n := range names {
if _, ok := s.paramMap[n]; !ok {
return fmt.Errorf("parameter: %s does not exist", n)
}
}
*parameters = make([]Parameter, len(names))
for i, n := range names {
// The parameter content (a byte slice) may change
// during RPC serialization due to write from other
// goroutine, we allow it since mini-batch based deep
// learning optimization methods are stochastic in
// nature. This race condition is allowed deliberately
// to save the program from making a copy of the
// paramter content.
(*parameters)[i] = s.paramMap[n]
p, ok := s.paramMap[name]
if !ok {
return fmt.Errorf("parameter: %s does not exist", name)
}
// The parameter content (a byte slice) may change
// during RPC serialization due to write from other
// goroutine, we allow it since mini-batch based deep
// learning optimization methods are stochastic in
// nature. This race condition is allowed deliberately
// to save the program from making a copy of the
// paramter content.
*parameter = p
return nil
}
......
......@@ -4,23 +4,19 @@ import (
"reflect"
"sync"
"testing"
"time"
"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
"github.com/PaddlePaddle/Paddle/go/pserver"
)
func TestFull(t *testing.T) {
s := pserver.NewService()
var dummy int
err := s.BeginInitParams(nil, &dummy)
if err != nil {
t.FailNow()
}
var p pserver.Parameter
p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32
err = s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
var dummy int
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
if err != nil {
t.FailNow()
}
......@@ -39,40 +35,39 @@ func TestFull(t *testing.T) {
t.FailNow()
}
var params []pserver.Parameter
err = s.GetParams([]string{"param_b", "param_a"}, &params)
var param pserver.Parameter
err = s.GetParam("param_b", &param)
if err != nil {
t.FailNow()
}
if len(params) != 2 || !reflect.DeepEqual(params[0], p1) || !reflect.DeepEqual(params[0], p1) {
if !reflect.DeepEqual(param, p1) {
t.FailNow()
}
grads := []pserver.Gradient{pserver.Gradient(p1), pserver.Gradient(p)}
err = s.SendGrads(grads, &dummy)
g1, g2 := pserver.Gradient(p1), pserver.Gradient(p)
err = s.SendGrad(g1, &dummy)
if err != nil {
t.FailNow()
}
err = s.SendGrad(g2, &dummy)
var params1 []pserver.Parameter
err = s.GetParams([]string{"param_b", "param_a"}, &params1)
if err != nil {
t.FailNow()
}
if len(params) != 2 {
var param1 pserver.Parameter
err = s.GetParam("param_a", &param1)
if err != nil {
t.FailNow()
}
// don't compare content, since it's already changed by
// gradient update.
params1[0].Content = nil
params1[0].Content = nil
param1.Content = nil
p.Content = nil
p1.Content = nil
if !reflect.DeepEqual(params1[0], p1) || !reflect.DeepEqual(params1[0], p1) {
if !reflect.DeepEqual(param1, p) {
t.FailNow()
}
}
......@@ -80,19 +75,7 @@ func TestFull(t *testing.T) {
func TestMultipleInit(t *testing.T) {
s := pserver.NewService()
var dummy int
err := s.BeginInitParams(nil, &dummy)
if err != nil {
t.FailNow()
}
// this is fine, it's possible for client to call init
// multiple times.
err = s.BeginInitParams(nil, &dummy)
if err != nil {
t.FailNow()
}
err = s.FinishInitParams(0, &dummy)
err := s.FinishInitParams(0, &dummy)
if err != nil {
t.FailNow()
}
......@@ -101,17 +84,12 @@ func TestMultipleInit(t *testing.T) {
if err != pserver.ErrAlreadyInitialized {
t.FailNow()
}
err = s.BeginInitParams(nil, &dummy)
if err != pserver.ErrAlreadyInitialized {
t.FailNow()
}
}
func TestUninitialized(t *testing.T) {
s := pserver.NewService()
var dummy int
err := s.SendGrads(nil, &dummy)
err := s.SendGrad(pserver.Gradient{}, &dummy)
if err != pserver.ErrUninitialized {
t.FailNow()
}
......@@ -123,8 +101,8 @@ func TestBlockUntilInitialized(t *testing.T) {
var wg sync.WaitGroup
wg.Add(1)
go func() {
var params []pserver.Parameter
err := s.GetParams(nil, &params)
var param pserver.Parameter
err := s.GetParam("param_a", &param)
if err != nil {
t.FailNow()
}
......@@ -143,11 +121,7 @@ func TestBlockUntilInitialized(t *testing.T) {
ch <- struct{}{}
}()
var dummy int
err := s.BeginInitParams(nil, &dummy)
if err != nil {
t.FailNow()
}
time.Sleep(50 * time.Millisecond)
select {
case <-ch:
......@@ -156,6 +130,16 @@ func TestBlockUntilInitialized(t *testing.T) {
default:
}
var p pserver.Parameter
p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32
var dummy int
err := s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
if err != nil {
t.FailNow()
}
err = s.FinishInitParams(0, &dummy)
if err != nil {
t.FailNow()
......
......@@ -8,10 +8,12 @@ add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(strings)
if(CMAKE_Go_COMPILER)
add_subdirectory(go)
endif()
# Do not build go directory until go cmake is working smoothly.
# if(CMAKE_Go_COMPILER)
# add_subdirectory(go)
# endif()
find_package(Boost QUIET)
......
......@@ -41,6 +41,7 @@ SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_network
paddle_proto
${external_project_dependencies}
${RDMA_LIBS}
)
IF(APPLE)
......@@ -73,6 +74,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
${CMAKE_DL_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${RDMA_LD_FLAGS}
${START_END}
)
......
......@@ -17,10 +17,9 @@ limitations under the License. */
#include <stdio.h>
#include "hl_base.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include "hl_neon_matrix_kernel.cuh"
#else
#include "hl_sse_matrix_kernel.cuh"
#ifndef __CUDA_ARCH__
#include "hl_cpu_matrix_kernel_detail.cuh"
#endif
/**
......@@ -114,35 +113,6 @@ void hl_cpu_apply_quaternary_op(Op op,
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_cpu_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
......
......@@ -13,26 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_SSE_MATRIX_KERNEL_CUH_
#define HL_SSE_MATRIX_KERNEL_CUH_
#ifndef HL_MATRIX_KERNEL_DETAIL_CUH_
#define HL_MATRIX_KERNEL_DETAIL_CUH_
#include "hl_matrix_type.cuh"
#define VECTOR_SIZE 16
#ifndef PADDLE_TYPE_DOUBLE
/* number of float in vector */
#define VECTOR_LEN 4
#define VECTOR_SET _mm_set_ps1
#else
#if defined(__APPLE__) || defined(__OSX__)
#define _mm_set_pd1 _mm_set1_pd
#endif
/* number of double in vector */
#define VECTOR_LEN 2
#define VECTOR_SET _mm_set_pd1
#endif
inline bool hl_check_align(size_t size) {
return !(size & (VECTOR_SIZE - 1));
}
......@@ -41,27 +26,63 @@ inline bool hl_check_align(void *ptr) {
return hl_check_align(reinterpret_cast<size_t>(ptr));
}
#ifndef PADDLE_TYPE_DOUBLE
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128 lo = _mm_unpacklo_ps(mm, mm);
__m128 hi = _mm_unpackhi_ps(mm, mm);
__m128 tmp1 = agg.vecOp(lo, hi);
__m128 tmp2 = _mm_movehl_ps(tmp1, tmp1);
__m128 ret = agg.vecOp(tmp1, tmp2);
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
return _mm_cvtss_f32(ret);
template <class Agg, class Op, class Saver>
void hl_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++) {
real tmp = agg.init();
for (int j = 0; j < dimN; j++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
}
}
#else
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128d lo = _mm_unpacklo_pd(mm, mm);
__m128d hi = _mm_unpackhi_pd(mm, mm);
__m128d ret = agg.vecOp(lo, hi);
return _mm_cvtsd_f64(ret);
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
#endif
template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
......@@ -118,35 +139,6 @@ void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
/*
* MaxRow greater than or equal dimN
* dimN is multiples of VECTOR_LEN
......@@ -315,4 +307,4 @@ void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
}
}
#endif /* HL_SSE_MATRIX_KERNEL_CUH_ */
#endif /* HL_MATRIX_KERNEL_DETAIL_CUH_ */
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SCALAR_CUH_
#define HL_CPU_SCALAR_CUH_
#define VECTOR_SIMD false
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
/* size of float */
#define VECTOR_SIZE 4
#else
/* size of double */
#define VECTOR_SIZE 8
#endif
typedef real vecType;
/* Consider a real as a vector */
#define VECTOR_LEN 1
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
return mm;
}
INLINE real hl_vec_set(const real r) {
return r;
}
INLINE real hl_vec_classification_error(const real a,
const real b,
const real p,
const real r) {
return ((a > p) == (b > p)) ? 0.0f : 1.0f;
}
#endif // HL_CPU_SCALAR_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SIMD_NEON_CUH_
#define HL_CPU_SIMD_NEON_CUH_
#include <arm_neon.h>
#define VECTOR_SIMD true
#define VECTOR_SIZE 16
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
typedef float32x4_t vecType;
/* number of float in vector */
#define VECTOR_LEN 4
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
float32x4_t rev = vrev64q_f32(mm);
float32x4_t tmp1 = agg.vecOp(rev, rev);
float32x2_t lo = vget_high_f32(rev);
float32x2_t hi = vget_low_f32(rev);
float32x4_t tmp2 = vcombine_f32(hi, lo);
float32x4_t ret = agg.vecOp(tmp1, tmp2);
return vgetq_lane_f32(ret, 0);
}
inline float32x4_t hl_vec_set(const real f) {
return vdupq_n_f32(f);
}
inline float32x4_t hl_vec_classification_error(const float32x4_t a,
const float32x4_t b,
const float32x4_t p,
const float32x4_t r) {
uint32x4_t tmp1 = vcgtq_f32(a, p);
uint32x4_t tmp2 = vcgtq_f32(b, p);
uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
return vcvtq_f32_u32(vandq_u32(tmp3, vcvtq_u32_f32(r)));
}
#else
#ifdef __aarch64__
typedef float64x2_t vecType;
/* number of float in vector */
#define VECTOR_LEN 2
#define VECTOR_SET vdupq_n_f64
#error To be implemented
#else
#error NEON instructions does not support double precision
#endif // __aarch64__
#endif
#endif // HL_CPU_SIMD_NEON_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CPU_SIMD_SSE_CUH_
#define HL_CPU_SIMD_SSE_CUH_
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#define VECTOR_SIMD true
#define VECTOR_SIZE 16
#define VECTOR_SET hl_vec_set
#ifndef PADDLE_TYPE_DOUBLE
typedef __m128 vecType;
/* number of float in vector */
#define VECTOR_LEN 4
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128 lo = _mm_unpacklo_ps(mm, mm);
__m128 hi = _mm_unpackhi_ps(mm, mm);
__m128 tmp1 = agg.vecOp(lo, hi);
__m128 tmp2 = _mm_movehl_ps(tmp1, tmp1);
__m128 ret = agg.vecOp(tmp1, tmp2);
return _mm_cvtss_f32(ret);
}
inline __m128 hl_vec_set(const real f) {
return _mm_set_ps1(f);
}
inline __m128 hl_vec_classification_error(const __m128 a,
const __m128 b,
const __m128 p,
const __m128 r) {
__m128 tmp1 = _mm_cmpgt_ps(a, p);
__m128 tmp2 = _mm_cmpgt_ps(b, p);
__m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
return _mm_and_ps(tmp3, r);
}
#else
typedef __m128d vecType;
/* number of double in vector */
#define VECTOR_LEN 2
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
__m128d lo = _mm_unpacklo_pd(mm, mm);
__m128d hi = _mm_unpackhi_pd(mm, mm);
__m128d ret = agg.vecOp(lo, hi);
return _mm_cvtsd_f64(ret);
}
inline __m128d hl_vec_set(const real d) {
#if defined(__APPLE__) || defined(__OSX__)
return _mm_set1_pd(d);
#else
return _mm_set_pd1(d);
#endif
}
inline __m128d hl_vec_classification_error(const __m128d a,
const __m128d b,
const __m128d p,
const __m128d r) {
__m128d tmp1 = _mm_cmpgt_pd(a, p);
__m128d tmp2 = _mm_cmpgt_pd(b, p);
__m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
return _mm_and_pd(tmp3, r);
}
#endif
#endif // HL_CPU_SIMD_SSE_CUH_
......@@ -18,26 +18,6 @@ limitations under the License. */
#include "hl_matrix_type.cuh"
#ifdef __CUDA_ARCH__
/**
* CUDA kernel inline function
*/
#define INLINE __device__ inline
#else
/**
* CPP inline function
*/
#define INLINE inline
#endif
#ifndef PADDLE_TYPE_DOUBLE
#define DEVICE_FMAX fmaxf
#define DEVICE_FMIN fminf
#else
#define DEVICE_FMAX fmax
#define DEVICE_FMIN fmin
#endif
class BaseOp {
public:
static const bool sse = false;
......@@ -66,10 +46,8 @@ typedef BaseOp SSESquaredDiff;
typedef BaseOp SSEFirst;
typedef BaseOp SSESecond;
typedef BaseOp SSEClassificationError;
#elif defined(__ARM__NEON__) || defined(__ARM_NEON)
#include "hl_matrix_base_neon.cuh"
#else
#include "hl_matrix_base_sse.cuh"
#include "hl_matrix_base_detail.cuh"
#endif
namespace aggregate {
......@@ -124,7 +102,7 @@ public:
add2(const real s1, const real s2)
: SSEAdd2(s1, s2), p1(s1), p2(s2) {}
INLINE real operator()(const real a, const real b) const {
return p1 * a + p2 * b;
return p1 * a + p2 * b;
}
};
......
......@@ -12,32 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_MATRIX_BASE_DETAIL_CUH_
#define HL_MATRIX_BASE_DETAIL_CUH_
#ifndef HL_MATRIX_BASE_NEON_CUH_
#define HL_MATRIX_BASE_NEON_CUH_
#include "hl_matrix_type.cuh"
#include "hl_tensor_ops.h"
namespace aggregate {
class SSESum {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vaddq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::add<vecType>()(a, b);
}
};
class SSEMax {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vmaxq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::max<vecType>()(a, b);
}
};
class SSEMin {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vminq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::min<vecType>()(a, b);
}
};
} // namespace aggregate
......@@ -46,8 +48,8 @@ namespace base {
namespace unary {
class SSEIdentity {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a) const {
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a) const {
return a;
}
};
......@@ -56,106 +58,96 @@ public:
namespace binary {
class SSEAdd {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vaddq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::add<vecType>()(a, b);
}
};
class SSEAdd2 {
public:
static const bool sse = true;
static const bool sse = VECTOR_SIMD;
const real p1;
const real p2;
float32x4_t mp1;
float32x4_t mp2;
vecType mp1;
vecType mp2;
public:
SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
mp1 = vdupq_n_f32(p1);
mp2 = vdupq_n_f32(p2);
mp1 = hl_vec_set(p1);
mp2 = hl_vec_set(p2);
}
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
float32x4_t tmp1, tmp2;
tmp1 = vmulq_f32(mp1, a);
tmp2 = vmulq_f32(mp2, b);
return vaddq_f32(tmp1, tmp2);
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::add_scale<vecType>(mp1, mp2)(a, b);
}
};
class SSESub {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vsubq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::sub<vecType>()(a, b);
}
};
class SSEMul {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
return vmulq_f32(a, b);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::mul<vecType>()(a, b);
}
};
class SSEDiv {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
float32x4_t tmp;
tmp = vrecpeq_f32(b);
return vmulq_f32(a, tmp);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hppl::binary::div<vecType>()(a, b);
}
};
class SSESquaredDiff {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
float32x4_t tmp;
tmp = vsubq_f32(a, b);
return vmulq_f32(tmp, tmp);
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
vecType tmp = hppl::binary::sub<vecType>()(a, b);
return hppl::binary::mul<vecType>()(tmp, tmp);
}
};
class SSEFirst {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return a;
}
};
class SSESecond {
public:
static const bool sse = true;
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
static const bool sse = VECTOR_SIMD;
INLINE vecType vecOp(const vecType a, const vecType b) const {
return b;
}
};
class SSEClassificationError {
public:
static const bool sse = true;
static const bool sse = VECTOR_SIMD;
const real p;
float32x4_t mp;
uint32x4_t result;
vecType mp;
vecType result;
public:
explicit SSEClassificationError(const real s) : p(s) {
mp = vdupq_n_f32(p);
result = vdupq_n_u32(1);
mp = hl_vec_set(p);
result = hl_vec_set(1.0f);
}
// TODO: to be check
INLINE float32x4_t vecOp(const float32x4_t a, const float32x4_t b) const {
uint32x4_t tmp1 = vcgtq_f32(a, mp);
uint32x4_t tmp2 = vcgtq_f32(b, mp);
uint32x4_t tmp3 = veorq_u32(tmp1, tmp2);
return vcvtq_f32_u32(vandq_u32(tmp3, result));
INLINE vecType vecOp(const vecType a, const vecType b) const {
return hl_vec_classification_error(a, b, mp, result);
}
};
} // namespace binary
} // namespace base
#endif /* HL_MATRIX_BASE_NEON_CUH_ */
#endif /* HL_MATRIX_BASE_DETAIL_CUH_ */
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_MATRIX_BASE_SSE_CUH_
#define HL_MATRIX_BASE_SSE_CUH_
namespace aggregate {
class SSESum {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
class SSEMax {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_max_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_max_pd(a, b);
}
};
class SSEMin {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_min_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_min_pd(a, b);
}
};
} // namespace aggregate
namespace base {
namespace unary {
class SSEIdentity {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a) const {
return a;
}
INLINE __m128d vecOp(const __m128d a) const {
return a;
}
};
} // namespace unary
namespace binary {
class SSEAdd {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
class SSEAdd2 {
public:
static const bool sse = true;
const real p1;
const real p2;
union {__m128 f; __m128d d;} mp1;
union {__m128 f; __m128d d;} mp2;
public:
SSEAdd2(const real s1, const real s2) : p1(s1), p2(s2) {
if (sizeof(real) == sizeof(float)) {
mp1.f = _mm_set1_ps(p1);
mp2.f = _mm_set1_ps(p2);
} else {
mp1.d = _mm_set1_pd(p1);
mp2.d = _mm_set1_pd(p2);
}
}
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
__m128 tmp1, tmp2;
tmp1 = _mm_mul_ps(mp1.f, a);
tmp2 = _mm_mul_ps(mp2.f, b);
return _mm_add_ps(tmp1, tmp2);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
__m128d tmp1, tmp2;
tmp1 = _mm_mul_pd(mp1.d, a);
tmp2 = _mm_mul_pd(mp2.d, b);
return _mm_add_pd(tmp1, tmp2);
}
};
class SSESub {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_sub_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_sub_pd(a, b);
}
};
class SSEMul {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_mul_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_mul_pd(a, b);
}
};
class SSEDiv {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_div_ps(a, b);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_div_pd(a, b);
}
};
class SSESquaredDiff {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return _mm_mul_ps(_mm_sub_ps(a, b), _mm_sub_ps(a, b));
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return _mm_mul_pd(_mm_sub_pd(a, b), _mm_sub_pd(a, b));
}
};
class SSEFirst {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return a;
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return a;
}
};
class SSESecond {
public:
static const bool sse = true;
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
return b;
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
return b;
}
};
class SSEClassificationError {
public:
static const bool sse = true;
const real p;
union {__m128 f; __m128d d;} mp;
union {__m128 f; __m128d d;} result;
public:
explicit SSEClassificationError(const real s) : p(s) {
if (sizeof(real) == sizeof(float)) {
mp.f = _mm_set1_ps(p);
result.f = _mm_set1_ps(1.0f);
} else {
mp.d = _mm_set1_pd(p);
result.d = _mm_set1_pd(1.0);
}
}
INLINE __m128 vecOp(const __m128 a, const __m128 b) const {
__m128 tmp1 = _mm_cmpgt_ps(a, mp.f);
__m128 tmp2 = _mm_cmpgt_ps(b, mp.f);
__m128 tmp3 = _mm_xor_ps(tmp1, tmp2);
return _mm_and_ps(tmp3, result.f);
}
INLINE __m128d vecOp(const __m128d a, const __m128d b) const {
__m128d tmp1 = _mm_cmpgt_pd(a, mp.d);
__m128d tmp2 = _mm_cmpgt_pd(b, mp.d);
__m128d tmp3 = _mm_xor_pd(tmp1, tmp2);
return _mm_and_pd(tmp3, result.d);
}
};
} // namespace binary
} // namespace base
#endif /* HL_MATRIX_BASE_SSE_CUH_ */
......@@ -17,35 +17,35 @@ limitations under the License. */
#include "hl_base.h"
#if defined(__CUDA_ARCH__)
#ifdef __CUDA_ARCH__
/**
* CUDA kernel inline function
*/
#define INLINE __device__ inline
#else
/**
* CPP inline function
*/
#define INLINE inline
#endif
#ifdef __CUDA_ARCH__
#include <vector_types.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef float4 vecType;
#else
typedef double2 vecType;
#endif
#elif (defined __ARM_NEON) || (defined __ARM_NEON__)
#include <arm_neon.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef float32x4_t vecType;
#else
#error NEON instructions does not support double precision
#endif
#elif defined(__SSE3__)
#include "hl_cpu_simd_sse.cuh"
#define PADDLE_USE_SSE3
#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
// Currently nvcc does not support neon intrinsic.
// TODO: Extract simd intrinsic implementation from .cu files.
#include "hl_cpu_simd_neon.cuh"
#define PADDLE_USE_NEON
#else
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#ifndef PADDLE_TYPE_DOUBLE
typedef __m128 vecType;
#else
typedef __m128d vecType;
#endif
#endif
#ifdef __CUDA_ARCH__
#define INLINE __device__ inline
#else
#define INLINE inline
#include "hl_cpu_scalar.cuh"
#endif
#endif // HL_MATRIX_TYPE_CUH_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_NEON_MATRIX_KERNEL_CUH_
#define HL_NEON_MATRIX_KERNEL_CUH_
#include "hl_matrix_type.cuh"
#define VECTOR_SIZE 16
/* number of float in vector */
#define VECTOR_LEN 4
#define VECTOR_SET vdupq_n_f32
inline bool hl_check_align(size_t size) {
return !(size & (VECTOR_SIZE - 1));
}
inline bool hl_check_align(void *ptr) {
return hl_check_align(reinterpret_cast<size_t>(ptr));
}
template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) {
float32x4_t rev = vrev64q_f32(mm);
float32x4_t tmp1 = agg.vecOp(rev, rev);
float32x2_t lo = vget_high_f32(rev);
float32x2_t hi = vget_low_f32(rev);
float32x4_t tmp2 = vcombine_f32(hi, lo);
float32x4_t ret = agg.vecOp(tmp1, tmp2);
return vgetq_lane_f32(ret, 0);
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda) {
for (int i = 0; i < dimM; i++, A += lda) {
vecType mm = VECTOR_SET(agg.init());
vecType *a = (vecType*)(A);
for (int j = 0; j < dimN / VECTOR_LEN; j++, a++) {
mm = agg.vecOp(mm, op.vecOp(*a));
}
int rem = dimN % VECTOR_LEN;
if (rem) {
real tmp = hl_agg_op(agg, mm);
real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
for (int j = 0; j < rem; j++) {
tmp = agg(tmp, op(a[j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
} else {
dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
}
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_row_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst, int ld,
real *A, int lda,
real *B, int ldb) {
for (int i = 0; i < dimM; i++, A += lda, B += ldb) {
vecType mm = VECTOR_SET(agg.init());
vecType *a = (vecType*)(A);
vecType *b = (vecType*)(B);
for (int j = 0; j < dimN / VECTOR_LEN; j++, a++, b++) {
mm = agg.vecOp(mm, op.vecOp(*a, *b));
}
int rem = dimN % VECTOR_LEN;
if (rem) {
real tmp = hl_agg_op(agg, mm);
real *a = A + (dimN / VECTOR_LEN) * VECTOR_LEN;
real *b = B + (dimN / VECTOR_LEN) * VECTOR_LEN;
for (int j = 0; j < rem; j++) {
tmp = agg(tmp, op(a[j], b[j]));
}
dst[i*ld] = sv(dst[i*ld], tmp);
} else {
dst[i*ld] = sv(dst[i*ld], hl_agg_op(agg, mm));
}
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
template <class Agg, class Op, class Saver>
void hl_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN; j++) {
real tmp = agg.init();
for (int i = 0; i < dimM; i++) {
tmp = agg(tmp, op(A[i * lda + j], B[i * ldb + j]));
}
dst[j] = sv(dst[j], tmp);
}
}
/*
* MaxRow greater than or equal dimN
* dimN is multiples of VECTOR_LEN
* so rem <= MaxRow / VECTOR_LEN
*/
template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
vecType mm[MaxRow / VECTOR_LEN];
for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
int rem = dimN % VECTOR_LEN;
if (rem) {
A += (dimN / VECTOR_LEN) * VECTOR_LEN;
dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda);
}
}
/*
* dimN is multiples of VECTOR_LEN
* dimN greater than Step
*/
template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step) {
vecType mm[Step / VECTOR_LEN];
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
}
int remRow = dimN % Step;
if (remRow) {
hl_sse_column_op_with_rem<Step>(agg, op, sv, dimM, remRow, dst, A, lda);
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda) {
if (dimN <= 16) {
hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda);
} else if (dimN <= 32) {
hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda);
} else if (dimN <= 1024 || dimM <= 512) {
hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda);
} else {
hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda);
}
}
template <int MaxRow, class Agg, class Op, class Saver>
void hl_sse_column_op_with_rem(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
vecType mm[MaxRow / VECTOR_LEN];
for (int n = 0; n < MaxRow / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
vecType *b = (vecType*)(B + i * ldb);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < dimN / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
int rem = dimN % VECTOR_LEN;
if (rem) {
A += (dimN / VECTOR_LEN) * VECTOR_LEN;
B += (dimN / VECTOR_LEN) * VECTOR_LEN;
dst += (dimN / VECTOR_LEN) * VECTOR_LEN;
hl_matrix_column_op(agg, op, sv, dimM, rem, dst, A, lda, B, ldb);
}
}
template <int Step, class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
for (int j = 0; j < dimN / Step; j++, dst += Step, A += Step, B += Step) {
vecType mm[Step / VECTOR_LEN];
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = VECTOR_SET(agg.init());
}
for (int i = 0; i < dimM; i++) {
vecType *a = (vecType*)(A + i * lda);
vecType *b = (vecType*)(B + i * ldb);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
mm[n] = agg.vecOp(mm[n], op.vecOp(a[n], b[n]));
}
}
vecType *result = (vecType*)(dst);
for (int n = 0; n < Step / VECTOR_LEN; n++) {
result[n] = sv.vecOp(result[n], mm[n]);
}
}
int remRow = dimN % Step;
if (remRow) {
hl_sse_column_op_with_rem<Step>(
agg, op, sv, dimM, remRow, dst, A, lda, B, ldb);
}
}
template <class Agg, class Op, class Saver>
void hl_sse_matrix_column_op(Agg agg, Op op, Saver sv,
int dimM, int dimN,
real *dst,
real *A, int lda,
real *B, int ldb) {
if (dimN <= 16) {
hl_sse_matrix_column_op<16>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else if (dimN <= 32) {
hl_sse_matrix_column_op<32>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else if (dimN <= 1024 || dimM <= 512) {
hl_sse_matrix_column_op<64>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
} else {
hl_sse_matrix_column_op<1024>(agg, op, sv, dimM, dimN, dst, A, lda, B, ldb);
}
}
#endif /* HL_NEON_MATRIX_KERNEL_CUH_ */
......@@ -328,6 +328,208 @@ public:
INLINE T operator()(const T a, const T b) const { return a < b ? b : a; }
};
#ifdef PADDLE_USE_SSE3
#ifndef PADDLE_TYPE_DOUBLE
template <>
class add<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_add_ps(a, b);
}
};
template <>
class add_scale<__m128> {
private:
const __m128 p1;
const __m128 p2;
public:
INLINE add_scale(const __m128 s1, const __m128 s2) : p1(s1), p2(s2) {}
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_add_ps(_mm_mul_ps(p1, a), _mm_mul_ps(p2, b));
}
};
template <>
class sub<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_sub_ps(a, b);
}
};
template <>
class mul<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_mul_ps(a, b);
}
};
template <>
class div<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_div_ps(a, b);
}
};
template <>
class min<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_min_ps(a, b);
}
};
template <>
class max<__m128> {
public:
INLINE __m128 operator()(const __m128 a, const __m128 b) const {
return _mm_max_ps(a, b);
}
};
#else
template <>
class add<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_add_pd(a, b);
}
};
template <>
class add_scale<__m128d> {
private:
const __m128d p1;
const __m128d p2;
public:
INLINE add_scale(const __m128d s1, const __m128d s2) : p1(s1), p2(s2) {}
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_add_pd(_mm_mul_pd(p1, a), _mm_mul_pd(p2, b));
}
};
template <>
class sub<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_sub_pd(a, b);
}
};
template <>
class mul<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_mul_pd(a, b);
}
};
template <>
class div<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_div_pd(a, b);
}
};
template <>
class min<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_min_pd(a, b);
}
};
template <>
class max<__m128d> {
public:
INLINE __m128d operator()(const __m128d a, const __m128d b) const {
return _mm_max_pd(a, b);
}
};
#endif // PADDLE_TYPE_DOUBLE
#endif // PADDLE_USE_SSE3
#ifdef PADDLE_USE_NEON
#ifndef PADDLE_TYPE_DOUBLE
template <>
class add<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmulq_f32(a, b);
}
};
template <>
class add_scale<float32x4_t> {
private:
const float32x4_t p1;
const float32x4_t p2;
public:
INLINE add_scale(const float32x4_t s1, const float32x4_t s2)
: p1(s1), p2(s2) {}
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vaddq_f32(vmulq_f32(p1, a), vmulq_f32(p2, b));
}
};
template <>
class sub<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vsubq_f32(a, b);
}
};
template <>
class mul<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmulq_f32(a, b);
}
};
template <>
class div<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
float32x4_t tmp = vrecpeq_f32(b);
return vmulq_f32(a, tmp);
}
};
template <>
class min<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vminq_f32(a, b);
}
};
template <>
class max<float32x4_t> {
public:
INLINE float32x4_t operator()(const float32x4_t a,
const float32x4_t b) const {
return vmaxq_f32(a, b);
}
};
#else
#error To be implemented
#endif // PADDLE_TYPE_DOUBLE
#endif // PADDLE_USE_NEON
} // namespace binary
} // namespace hppl
......
......@@ -28,6 +28,7 @@ if(WITH_TESTING)
add_simple_unittest(PadOpTest)
add_simple_unittest(MulOpTest)
add_simple_unittest(CosSimOpTest)
add_simple_unittest(RowConvOpTest)
endif()
endif()
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvOp.h"
#include <iostream>
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void RowConv<DEVICE_TYPE_CPU>(CpuMatrix& out,
const CpuMatrix& in,
const CpuMatrix& filter,
const CpuIVector& seq) {
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
for (size_t j = begin; j < end; ++j) {
MatrixPtr x;
MatrixPtr w;
if ((j + contextLength) < end) {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, contextLength);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, contextLength);
} else {
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, end - j);
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, end - j);
}
MatrixPtr y = out.subMatrix(j, 1);
y->addDotMulVMM(*x, *w);
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_CPU>(const CpuMatrix& outG,
const CpuMatrix& in,
const CpuMatrix& filter,
CpuMatrix& inG,
CpuMatrix& filterG,
const CpuIVector& seq) {
// gradient w.r.t filter
const int* starts = seq.getData();
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
if (filterG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < contextLength && (begin + j) < end; ++j) {
MatrixPtr x =
(const_cast<CpuMatrix&>(in)).subMatrix(begin + j, steps - j);
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin, steps - j);
MatrixPtr dw = filterG.subMatrix(j, 1);
dw->addDotMulVMM(*dy, *x);
}
}
}
// gradient w.r.t input feature
if (inG) {
for (size_t i = 0; i < numSeq; ++i) {
size_t begin = starts[i];
size_t end = starts[i + 1];
size_t steps = end - begin;
for (size_t j = 0; j < steps; ++j) {
MatrixPtr dx = inG.subMatrix(begin + j, 1);
for (size_t t = 0; t < contextLength; ++t) {
if (int(j - t) >= 0) {
MatrixPtr dy =
(const_cast<CpuMatrix&>(outG)).subMatrix(begin + j - t, 1);
MatrixPtr w = (const_cast<CpuMatrix&>(filter)).subMatrix(t, 1);
dx->addDotMul(*dy, *w, 1.0, 1.0);
}
}
}
}
}
}
/**
* \brief The row convolution is called lookahead convolution. It is firstly
* introduced in deep-speech2 system. The bidirectional RNN that learns
* representation for a sequence by performing a forward and a backward pass
* through the entire sequence. However, unlike unidirectional RNNs,
* bidirectional RNNs are challenging to deploy in an online and low-latency
* setting. The lookahead convolution incorporates information from future
* subsequences in a computationally efficient manner to improve unidirectional
* recurrent neural networks.
*
* The connection of row convolution is different form the 1D sequence
* convolution. Assumed that, the future context-length is k, that is to say,
* it can get the output at timestep t by using the the input feature from t-th
* timestep to (t+k)-th timestep. Assumed that the hidden dim of input
* activations are d, the activations r_t for the new layer at time-step t are:
*
*
* -- k + 1
* r(t,i) = > W(i,j) * h(t+j-1, i), for (1 <= i <= d)
* -- j = 1
*
*
* The weight shape is: (k + 1) x d
* Function Arguments:
*
* \param inputs[0] The input activations.
* \param inputs[0] The filter (or weight) and shape is (k+1) x d.
* \param outputs[1] The output activations.
*
* [1] Dario Amodei, etc. Deep Speech 2 : End-to-End Speech Recognition in
* English
* and Mandarin. https://arxiv.org/abs/1512.02595
*/
template <DeviceType Device>
class RowConvFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(2UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
// TODO(qingqing): support ASSIGN_TO.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto in = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out = dynamic_cast<const SequenceArg&>(outputs[0]);
auto w = inputs[1];
CHECK(in.data() && out.data() && in.getSequenceId().data());
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == out.shape());
CHECK_EQ(w.shape()[1], in.shape()[1]);
auto outMat = out.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConv<Device>(outMat, inMat, wMat, seqId);
}
};
/**
* \brief The backward of row convolution function. This function calculated
* the gradient w.r.t filter and the gradient w.r.t input activations(or data).
*
* Argument in this Function:
*
* \param inputs[0] The gradient w.r.t output activations.
* \param inputs[1] The input activations.
* \param inputs[2] The filter (or weight) and shape is (k+1) x d.
* \param outputs[0] The gradient w.r.t input activations.
* \param outputs[1] The gradient w.r.r filter.
*
* Abbreviation:
* w.r.t: with respect to.
*/
template <DeviceType Device>
class RowConvGradFunc : public FunctionBase {
// TODO(qingqing): split into RowConvDataFunc and RowConvWeightFunc
public:
void init(const FuncConfig& config) override {}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
// check
CHECK_EQ(3UL, inputs.size());
CHECK_EQ(2UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
CHECK(inputs[0].isSequenceArg() && inputs[1].isSequenceArg() &&
outputs[0].isSequenceArg())
<< "SequenceArg required here.";
const auto outGrad = dynamic_cast<const SequenceArg&>(inputs[0]);
const auto in = dynamic_cast<const SequenceArg&>(inputs[1]);
const auto w = inputs[2];
auto inGrad = dynamic_cast<const SequenceArg&>(outputs[0]);
auto wGrad = outputs[1];
CHECK_EQ(in.shape().ndims(), 2UL);
CHECK(in.shape() == inGrad.shape());
CHECK(in.shape() == outGrad.shape());
CHECK_EQ(wGrad.shape()[1], in.shape()[1]);
const auto outGMat = outGrad.matrix<Device>();
const auto inMat = in.matrix<Device>();
const auto wMat = w.matrix<Device>();
auto inGMat = inGrad.data()
? inGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
auto wGMat = wGrad.data()
? wGrad.matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seqId = in.getSequenceId().vector<int, Device>();
RowConvGrad<Device>(outGMat, inMat, wMat, inGMat, wGMat, seqId);
}
};
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/**
* \brief The forward of row convolution.
*
* \param[out] out The output data and shape is h x d. h is the sum of
* time steps of all samples in one mini-batch.
* \param[in] in The input data and shape is h x d.
* \param[in] filter The filter and shape is k x d. The lookahead step
* number plus one equals k.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConv(typename Tensor<real, DType>::Matrix& out,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
const typename Tensor<int, DType>::Vector& seq);
/**
* \brief The backward of row convolution.
*
* \param[in] outG The gradient w.r.t output data.
* \param[in] in The input data.
* \param[in] filter The filter.
* \param[out] inG The gradient w.r.t input data.
* \param[out] filterG The gradient w.r.t filter.
* \param[in] seq The sequence start positions.
*
*/
template <DeviceType DType>
void RowConvGrad(const typename Tensor<real, DType>::Matrix& outG,
const typename Tensor<real, DType>::Matrix& in,
const typename Tensor<real, DType>::Matrix& filter,
typename Tensor<real, DType>::Matrix& inG,
typename Tensor<real, DType>::Matrix& filterG,
const typename Tensor<int, DType>::Vector& seq);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "RowConvOp.h"
namespace paddle {
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConv(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context; ++t) {
if ((start + j + t) < end) {
int xoff = off + t * width;
real xVal = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * xVal;
}
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConv2(real* y, const real* x, const real* w,
const int* starts, const int height, const int width,
const int numSeq, const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
int off = (start + j) * width;
real sum = 0;
for (int t = 0; t < context && (start + j + t) < end; ++t) {
int xoff = off + t * width;
real xd = gidx + tidx < width ? x[xoff + gidx + tidx] : 0.0;
real wd = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wd * xd;
}
if (gidx + tidx < width) {
y[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConv<DEVICE_TYPE_GPU>(GpuMatrix& out,
const GpuMatrix& in,
const GpuMatrix& filter,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
real* y = out.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
if (contextLength <= 32) {
KeRowConv<32, 32><<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
} else {
KeRowConv2<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(y, x, w, starts, height, width, numSeq, contextLength);
}
CHECK_SYNC("RowConv");
}
template<int BLOCK_H, int BLOCK_W, int CONTEXT>
__global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_W][BLOCK_H];
__shared__ real sh_dy[BLOCK_W][BLOCK_H + CONTEXT - 1];
__shared__ real sh_dw[CONTEXT][BLOCK_W];
if (tidy < context) {
sh_dw[tidy][tidx] = 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0;
__syncthreads();
if (tidy < (context - 1)) {
yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0;
}
__syncthreads();
for (int t = 0; t < context; t++) {
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx + context - 1 - t];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0) {
sh_dw[t][tidy] += val;
}
__syncthreads();
}
}
}
for (int t = tidy; (t < context) && ((gidx + tidx) < width); t += blky) {
dw[t * width + gidx + tidx] += sh_dw[t][tidx];
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sh_x[BLOCK_H][BLOCK_W];
__shared__ real sh_dy[BLOCK_H][BLOCK_W];
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
const int size = ((steps + BLOCK_H - 1)/BLOCK_H) * BLOCK_H;
for (int j = tidy; j < size; j += BLOCK_H) {
int xoff = gidx + tidx;
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
__syncthreads();
for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
__syncthreads();
// warp size and blockDim.x is 32.
val += __shfl_down(val, 16);
val += __shfl_down(val, 8);
val += __shfl_down(val, 4);
val += __shfl_down(val, 2);
val += __shfl_down(val, 1);
__syncthreads();
if (tidx == 0 && (gidx + tidy) < width) {
dw[t*width + gidx + tidy] += val;
}
}
}
}
}
template<int BLOCK_H, int BLOCK_W>
__global__ void KeRowConvBwData(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
__shared__ real sw[BLOCK_H][BLOCK_W];
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
sum += sw[t][tidx] * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
__global__ void KeRowConvBwData2(real* dx, const real* w, const real* dy,
const int* starts, const int height, const int width, const int numSeq,
const int context) {
const int tidx = threadIdx.x;
const int tidy = threadIdx.y;
const int blky = blockDim.y;
const int gidx = blockIdx.x * blockDim.x;
for (int i = 0; i < numSeq; ++i) {
const int start = starts[i];
const int end = starts[i + 1];
const int steps = end - start;
for (int j = tidy; j < steps; j += blky) {
real sum = 0;
int off = (start + j) * width;
for (int t = 0; t < context && (j - t) >= 0; ++t) {
int dyOff = off - t * width;
real dyVal = gidx + tidx < width ? dy[dyOff + gidx + tidx] : 0.0;
real wVal = gidx + tidx < width ? w[t * width + gidx + tidx] : 0.0;
sum += wVal * dyVal;
}
if (gidx + tidx < width) {
dx[off + gidx + tidx] += sum;
}
}
}
}
template <>
void RowConvGrad<DEVICE_TYPE_GPU>(const GpuMatrix& outG,
const GpuMatrix& in,
const GpuMatrix& filter,
GpuMatrix& inG,
GpuMatrix& filterG,
const GpuIVector& seq) {
const size_t numSeq = seq.getSize() - 1;
const size_t contextLength = filter.getHeight();
const size_t height = in.getHeight();
const size_t width = in.getWidth();
const real* dy = outG.getData();
const real* x = in.getData();
const real* w = filter.getData();
const int* starts = seq.getData();
if (filterG) {
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
real* dw = filterG.getData();
if (contextLength <= 32) {
KeRowConvBwWeight<32, 32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwWeight2<32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
}
}
if (inG) {
real* dx = inG.getData();
dim3 dimBlock2(32, 32);
dim3 dimGrid2(DIVUP(width, dimBlock2.x), 1);
if (contextLength <= 64) {
KeRowConvBwData<32, 64>
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
} else {
KeRowConvBwData2
<<<dimGrid2, dimBlock2, 0, STREAM_DEFAULT>>>
(dx, w, dy, starts, height, width, numSeq, contextLength);
}
}
CHECK_SYNC("RowConvGrad");
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
namespace paddle {
void testRowConvFw(size_t batchSize, size_t dim, size_t contextLength) {
FunctionCompare test("RowConv", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.run();
}
void testRowConvBw(size_t batchSize, size_t dim, size_t contextLength) {
FunctionCompare test("RowConvGrad", FuncConfig());
test.addSequence(SequenceIdArg(TensorShape{batchSize}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}));
test.addOutputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batchSize, dim}),
ADD_TO);
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{contextLength, dim}),
ADD_TO);
test.run();
}
TEST(RowConv, real) {
for (size_t numSamples : {17, 129, 2020}) {
for (size_t dim : {16, 512, 2560}) {
for (size_t context : {3, 19, 65}) {
VLOG(3) << " numSamples=" << numSamples << " dim=" << dim
<< " context length=" << context;
testRowConvFw(numSamples, dim, context);
testRowConvBw(numSamples, dim, context);
}
}
}
}
} // namespace paddle
include_directories(${CMAKE_CURRENT_BINARY_DIR})
go_library(adder SRCS adder.go)
cc_test(cgo_test
SRCS
cgo_test.cc
DEPS
adder)
package main
import "C"
//export GoAdder
func GoAdder(x, y int) int {
return x + y
}
func main() {} // Required but ignored
cmake_minimum_required(VERSION 3.0)
if(GTEST_INCLUDE_DIR AND GTEST_LIBRARIES)
message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})")
else()
# find #include <majel/xx.h>
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
include_directories(${PARENT_DIR})
# find cmake directory modules
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
# enable c++11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# enable gtest
set(THIRD_PARTY_PATH ./third_party)
set(WITH_TESTING ON)
include(external/gtest)
endif()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
project(cxx_go CXX C Go)
include(cmake/golang.cmake)
include(cmake/flags.cmake)
ExternalGoProject_Add(pserver github.com/PaddlePaddle/Paddle/paddle/go/pserver)
add_go_library(client STATIC pserver)
add_subdirectory(test)
cmake_minimum_required(VERSION 3.0)
include_directories(/env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/)
add_executable(main main.c)
add_dependencies(main client)
set (CMAKE_EXE_LINKER_FLAGS "-pthread")
target_link_libraries(main /env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/libclient.a) # ${GTEST_LIBRARIES})
#include <iostream>
#include "gtest/gtest.h"
#include "libadder.h"
TEST(Cgo, Invoke) { EXPECT_EQ(GoAdder(30, 12), 42); }
package pserver
// Client is the client to parameter servers.
type Client struct {
}
// NewClient creates a new client.
func NewClient(addr string) *Client {
return &Client{}
}
// BeginInitParams begins to initialize parameters on parameter
// servers.
//
// BeginInitParams will be called from multiple trainers, only one
// trainer will be selected to initialize the parameters on parameter
// servers. Other trainers will be blocked until the initialization is
// done, and they need to get the initialized parameters from
// parameter servers using GetParams.
func (c *Client) BeginInitParams(pserverConfigProto []byte) (selected bool, err error) {
return true, nil
}
// InitParam initializes the parameter on parameter servers.
func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error {
return nil
}
// FinishInitParams tells parameter servers client has sent all
// parameters to parameter servers as initialization.
func (c *Client) FinishInitParams() error {
return nil
}
// SendGrads sends gradients to parameter servers for updating
// parameters.
func (c *Client) SendGrads(grads []Gradient) error {
return nil
}
// GetParams gets parameters from parameter servers.
func (c *Client) GetParams(names []string) ([]Parameter, error) {
return nil, nil
}
// SaveModel indicates parameters to save the parameter to the given
// path.
func (c *Client) SaveModel(path string) error {
return nil
}
// Cleanup cleans up the client states.
func (c *Client) Cleanup() {
}
# RecordIO
## Write
```go
f, e := os.Create("a_file.recordio")
w := recordio.NewWriter(f)
w.Write([]byte("Hello"))
w.Write([]byte("World!"))
w.Close()
```
## Read
1. Load chunk index:
```go
f, e := os.Open("a_file.recordio")
idx, e := recordio.LoadIndex(f)
fmt.Println("Total records: ", idx.Len())
```
2. Create one or more scanner to read a range of records. The
following example reads the range
[1, 3), i.e., the second and the third records:
```go
f, e := os.Open("a_file.recordio")
s := recrodio.NewScanner(f, idx, 1, 3)
for s.Scan() {
fmt.Println(string(s.Record()))
}
if s.Err() != nil && s.Err() != io.EOF {
log.Fatalf("Something wrong with scanning: %v", e)
}
```
package recordio
import (
"bytes"
"compress/gzip"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"github.com/golang/snappy"
)
// A Chunk contains the Header and optionally compressed records. To
// create a chunk, just use ch := &Chunk{}.
type Chunk struct {
records [][]byte
numBytes int // sum of record lengths.
}
func (ch *Chunk) add(record []byte) {
ch.records = append(ch.records, record)
ch.numBytes += len(record)
}
// dump the chunk into w, and clears the chunk and makes it ready for
// the next add invocation.
func (ch *Chunk) dump(w io.Writer, compressorIndex int) error {
// NOTE: don't check ch.numBytes instead, because empty
// records are allowed.
if len(ch.records) == 0 {
return nil
}
// Write raw records and their lengths into data buffer.
var data bytes.Buffer
for _, r := range ch.records {
var rs [4]byte
binary.LittleEndian.PutUint32(rs[:], uint32(len(r)))
if _, e := data.Write(rs[:]); e != nil {
return fmt.Errorf("Failed to write record length: %v", e)
}
if _, e := data.Write(r); e != nil {
return fmt.Errorf("Failed to write record: %v", e)
}
}
compressed, e := compressData(&data, compressorIndex)
if e != nil {
return e
}
// Write chunk header and compressed data.
hdr := &Header{
checkSum: crc32.ChecksumIEEE(compressed.Bytes()),
compressor: uint32(compressorIndex),
compressedSize: uint32(compressed.Len()),
numRecords: uint32(len(ch.records)),
}
if _, e := hdr.write(w); e != nil {
return fmt.Errorf("Failed to write chunk header: %v", e)
}
if _, e := w.Write(compressed.Bytes()); e != nil {
return fmt.Errorf("Failed to write chunk data: %v", e)
}
// Clear the current chunk.
ch.records = nil
ch.numBytes = 0
return nil
}
type noopCompressor struct {
*bytes.Buffer
}
func (c *noopCompressor) Close() error {
return nil
}
func compressData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
compressed := new(bytes.Buffer)
var compressor io.WriteCloser
switch compressorIndex {
case NoCompression:
compressor = &noopCompressor{compressed}
case Snappy:
compressor = snappy.NewBufferedWriter(compressed)
case Gzip:
compressor = gzip.NewWriter(compressed)
default:
return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
}
if _, e := io.Copy(compressor, src); e != nil {
return nil, fmt.Errorf("Failed to compress chunk data: %v", e)
}
compressor.Close()
return compressed, nil
}
// parse the specified chunk from r.
func parseChunk(r io.ReadSeeker, chunkOffset int64) (*Chunk, error) {
var e error
var hdr *Header
if _, e = r.Seek(chunkOffset, io.SeekStart); e != nil {
return nil, fmt.Errorf("Failed to seek chunk: %v", e)
}
hdr, e = parseHeader(r)
if e != nil {
return nil, fmt.Errorf("Failed to parse chunk header: %v", e)
}
var buf bytes.Buffer
if _, e = io.CopyN(&buf, r, int64(hdr.compressedSize)); e != nil {
return nil, fmt.Errorf("Failed to read chunk data: %v", e)
}
if hdr.checkSum != crc32.ChecksumIEEE(buf.Bytes()) {
return nil, fmt.Errorf("Checksum checking failed.")
}
deflated, e := deflateData(&buf, int(hdr.compressor))
if e != nil {
return nil, e
}
ch := &Chunk{}
for i := 0; i < int(hdr.numRecords); i++ {
var rs [4]byte
if _, e = deflated.Read(rs[:]); e != nil {
return nil, fmt.Errorf("Failed to read record length: %v", e)
}
r := make([]byte, binary.LittleEndian.Uint32(rs[:]))
if _, e = deflated.Read(r); e != nil {
return nil, fmt.Errorf("Failed to read a record: %v", e)
}
ch.records = append(ch.records, r)
ch.numBytes += len(r)
}
return ch, nil
}
func deflateData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
var e error
var deflator io.Reader
switch compressorIndex {
case NoCompression:
deflator = src
case Snappy:
deflator = snappy.NewReader(src)
case Gzip:
deflator, e = gzip.NewReader(src)
if e != nil {
return nil, fmt.Errorf("Failed to create gzip reader: %v", e)
}
default:
return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
}
deflated := new(bytes.Buffer)
if _, e = io.Copy(deflated, deflator); e != nil {
return nil, fmt.Errorf("Failed to deflate chunk data: %v", e)
}
return deflated, nil
}
package recordio
import (
"encoding/binary"
"fmt"
"io"
)
const (
// NoCompression means writing raw chunk data into files.
// With other choices, chunks are compressed before written.
NoCompression = iota
// Snappy had been the default compressing algorithm widely
// used in Google. It compromises between speech and
// compression ratio.
Snappy
// Gzip is a well-known compression algorithm. It is
// recommmended only you are looking for compression ratio.
Gzip
magicNumber uint32 = 0x01020304
defaultCompressor = Snappy
)
// Header is the metadata of Chunk.
type Header struct {
checkSum uint32
compressor uint32
compressedSize uint32
numRecords uint32
}
func (c *Header) write(w io.Writer) (int, error) {
var buf [20]byte
binary.LittleEndian.PutUint32(buf[0:4], magicNumber)
binary.LittleEndian.PutUint32(buf[4:8], c.checkSum)
binary.LittleEndian.PutUint32(buf[8:12], c.compressor)
binary.LittleEndian.PutUint32(buf[12:16], c.compressedSize)
binary.LittleEndian.PutUint32(buf[16:20], c.numRecords)
return w.Write(buf[:])
}
func parseHeader(r io.Reader) (*Header, error) {
var buf [20]byte
if _, e := r.Read(buf[:]); e != nil {
return nil, e
}
if v := binary.LittleEndian.Uint32(buf[0:4]); v != magicNumber {
return nil, fmt.Errorf("Failed to parse magic number")
}
return &Header{
checkSum: binary.LittleEndian.Uint32(buf[4:8]),
compressor: binary.LittleEndian.Uint32(buf[8:12]),
compressedSize: binary.LittleEndian.Uint32(buf[12:16]),
numRecords: binary.LittleEndian.Uint32(buf[16:20]),
}, nil
}
package recordio
import "io"
// Index consists offsets and sizes of the consequetive chunks in a RecordIO file.
type Index struct {
chunkOffsets []int64
chunkLens []uint32
numRecords int // the number of all records in a file.
chunkRecords []int // the number of records in chunks.
}
// LoadIndex scans the file and parse chunkOffsets, chunkLens, and len.
func LoadIndex(r io.ReadSeeker) (*Index, error) {
f := &Index{}
offset := int64(0)
var e error
var hdr *Header
for {
hdr, e = parseHeader(r)
if e != nil {
break
}
f.chunkOffsets = append(f.chunkOffsets, offset)
f.chunkLens = append(f.chunkLens, hdr.numRecords)
f.chunkRecords = append(f.chunkRecords, int(hdr.numRecords))
f.numRecords += int(hdr.numRecords)
offset, e = r.Seek(int64(hdr.compressedSize), io.SeekCurrent)
if e != nil {
break
}
}
if e == io.EOF {
return f, nil
}
return nil, e
}
// NumRecords returns the total number of records in a RecordIO file.
func (r *Index) NumRecords() int {
return r.numRecords
}
// NumChunks returns the total number of chunks in a RecordIO file.
func (r *Index) NumChunks() int {
return len(r.chunkLens)
}
// ChunkIndex return the Index of i-th Chunk.
func (r *Index) ChunkIndex(i int) *Index {
idx := &Index{}
idx.chunkOffsets = []int64{r.chunkOffsets[i]}
idx.chunkLens = []uint32{r.chunkLens[i]}
idx.chunkRecords = []int{r.chunkRecords[i]}
idx.numRecords = idx.chunkRecords[0]
return idx
}
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
// record is out of range.
func (r *Index) Locate(recordIndex int) (int, int) {
sum := 0
for i, l := range r.chunkLens {
sum += int(l)
if recordIndex < sum {
return i, recordIndex - sum + int(l)
}
}
return -1, -1
}
// Scanner scans records in a specified range within [0, numRecords).
type Scanner struct {
reader io.ReadSeeker
index *Index
start, end, cur int
chunkIndex int
chunk *Chunk
err error
}
// NewScanner creates a scanner that sequencially reads records in the
// range [start, start+len). If start < 0, it scans from the
// beginning. If len < 0, it scans till the end of file.
func NewScanner(r io.ReadSeeker, index *Index, start, len int) *Scanner {
if start < 0 {
start = 0
}
if len < 0 || start+len >= index.NumRecords() {
len = index.NumRecords() - start
}
return &Scanner{
reader: r,
index: index,
start: start,
end: start + len,
cur: start - 1, // The intial status required by Scan.
chunkIndex: -1,
chunk: &Chunk{},
}
}
// Scan moves the cursor forward for one record and loads the chunk
// containing the record if not yet.
func (s *Scanner) Scan() bool {
s.cur++
if s.cur >= s.end {
s.err = io.EOF
} else {
if ci, _ := s.index.Locate(s.cur); s.chunkIndex != ci {
s.chunkIndex = ci
s.chunk, s.err = parseChunk(s.reader, s.index.chunkOffsets[ci])
}
}
return s.err == nil
}
// Record returns the record under the current cursor.
func (s *Scanner) Record() []byte {
_, ri := s.index.Locate(s.cur)
return s.chunk.records[ri]
}
// Error returns the error that stopped Scan.
func (s *Scanner) Error() error {
return s.err
}
package recordio
import (
"bytes"
"testing"
"unsafe"
"github.com/stretchr/testify/assert"
)
func TestChunkHead(t *testing.T) {
assert := assert.New(t)
c := &Header{
checkSum: 123,
compressor: 456,
compressedSize: 789,
}
var buf bytes.Buffer
_, e := c.write(&buf)
assert.Nil(e)
cc, e := parseHeader(&buf)
assert.Nil(e)
assert.Equal(c, cc)
}
func TestWriteAndRead(t *testing.T) {
assert := assert.New(t)
data := []string{
"12345",
"1234",
"12"}
var buf bytes.Buffer
w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
n, e := w.Write([]byte(data[0])) // not exceed chunk size.
assert.Nil(e)
assert.Equal(5, n)
n, e = w.Write([]byte(data[1])) // not exceed chunk size.
assert.Nil(e)
assert.Equal(4, n)
n, e = w.Write([]byte(data[2])) // exeeds chunk size, dump and create a new chunk.
assert.Nil(e)
assert.Equal(n, 2)
assert.Nil(w.Close()) // flush the second chunk.
assert.Nil(w.Writer)
n, e = w.Write([]byte("anything")) // not effective after close.
assert.NotNil(e)
assert.Equal(n, 0)
idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
assert.Nil(e)
assert.Equal([]uint32{2, 1}, idx.chunkLens)
assert.Equal(
[]int64{0,
int64(4 + // magic number
unsafe.Sizeof(Header{}) +
5 + // first record
4 + // second record
2*4)}, // two record legnths
idx.chunkOffsets)
s := NewScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
i := 0
for s.Scan() {
assert.Equal(data[i], string(s.Record()))
i++
}
}
func TestWriteEmptyFile(t *testing.T) {
assert := assert.New(t)
var buf bytes.Buffer
w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
assert.Nil(w.Close())
assert.Equal(0, buf.Len())
idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
assert.Nil(e)
assert.Equal(0, idx.NumRecords())
}
package recordio_test
import (
"bytes"
"reflect"
"testing"
"github.com/PaddlePaddle/Paddle/paddle/go/recordio"
)
func TestWriteRead(t *testing.T) {
const total = 1000
var buf bytes.Buffer
w := recordio.NewWriter(&buf, 0, -1)
for i := 0; i < total; i++ {
_, err := w.Write(make([]byte, i))
if err != nil {
t.Fatal(err)
}
}
w.Close()
idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
if err != nil {
t.Fatal(err)
}
if idx.NumRecords() != total {
t.Fatal("num record does not match:", idx.NumRecords(), total)
}
s := recordio.NewScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
i := 0
for s.Scan() {
if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
}
i++
}
if i != total {
t.Fatal("total count not match:", i, total)
}
}
func TestChunkIndex(t *testing.T) {
const total = 1000
var buf bytes.Buffer
w := recordio.NewWriter(&buf, 0, -1)
for i := 0; i < total; i++ {
_, err := w.Write(make([]byte, i))
if err != nil {
t.Fatal(err)
}
}
w.Close()
idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
if err != nil {
t.Fatal(err)
}
if idx.NumChunks() != total {
t.Fatal("unexpected chunk num:", idx.NumChunks(), total)
}
for i := 0; i < total; i++ {
newIdx := idx.ChunkIndex(i)
s := recordio.NewScanner(bytes.NewReader(buf.Bytes()), newIdx, -1, -1)
j := 0
for s.Scan() {
if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
}
j++
}
if j != 1 {
t.Fatal("unexpected record per chunk:", j)
}
}
}
package recordio
import (
"fmt"
"io"
)
const (
defaultMaxChunkSize = 32 * 1024 * 1024
)
// Writer creates a RecordIO file.
type Writer struct {
io.Writer // Set to nil to mark a closed writer.
chunk *Chunk
maxChunkSize int // total records size, excluding metadata, before compression.
compressor int
}
// NewWriter creates a RecordIO file writer. Each chunk is compressed
// using the deflate algorithm given compression level. Note that
// level 0 means no compression and -1 means default compression.
func NewWriter(w io.Writer, maxChunkSize, compressor int) *Writer {
if maxChunkSize < 0 {
maxChunkSize = defaultMaxChunkSize
}
if compressor < 0 {
compressor = defaultCompressor
}
return &Writer{
Writer: w,
chunk: &Chunk{},
maxChunkSize: maxChunkSize,
compressor: compressor}
}
// Writes a record. It returns an error if Close has been called.
func (w *Writer) Write(record []byte) (int, error) {
if w.Writer == nil {
return 0, fmt.Errorf("Cannot write since writer had been closed")
}
if w.chunk.numBytes+len(record) > w.maxChunkSize {
if e := w.chunk.dump(w.Writer, w.compressor); e != nil {
return 0, e
}
}
w.chunk.add(record)
return len(record), nil
}
// Close flushes the current chunk and makes the writer invalid.
func (w *Writer) Close() error {
e := w.chunk.dump(w.Writer, w.compressor)
w.Writer = nil
return e
}
......@@ -217,10 +217,10 @@ void SmoothL1CostLayer::forwardImp(Matrix& output,
targetCpu->copyFrom(target);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
targetCpu->smoothL1(*outputCpu, *labelCpu);
targetCpu->smoothL1(*outputCpu, *labelCpu, 1.0);
target.copyFrom(*targetCpu);
} else {
target.smoothL1(output, *label.value);
target.smoothL1(output, *label.value, 1.0);
}
}
......@@ -238,10 +238,10 @@ void SmoothL1CostLayer::backwardImp(Matrix& output,
outputGCpu->copyFrom(outputG);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
outputGCpu->smoothL1Bp(*outputCpu, *labelCpu);
outputGCpu->smoothL1Bp(*outputCpu, *labelCpu, 1.0);
outputG.copyFrom(*outputGCpu);
} else {
outputG.smoothL1Bp(output, *label.value);
outputG.smoothL1Bp(output, *label.value, 1.0);
}
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RowConvLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(row_conv, RowConvLayer);
bool RowConvLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
contexLength_ = config_.inputs(0).row_conv_conf().context_length();
CHECK_EQ(inputLayers_.size(), 1UL);
weight_.reset(new Weight(contexLength_, getSize(), parameters_[0]));
createFunction(forward_, "RowConv", FuncConfig());
createFunction(backward_, "RowConvGrad", FuncConfig());
return true;
}
void RowConvLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = getInputValue(0);
size_t height = input->getHeight();
size_t width = input->getWidth();
CHECK_EQ(width, getSize());
resetOutput(height, width);
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
MatrixPtr w = weight_->getW();
wDims_ = TensorShape({w->getHeight(), w->getWidth()});
MatrixPtr outV = getOutputValue();
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*w, wDims_);
outputs.addArg(*getOutputValue(), *startPos, ADD_TO);
{
REGISTER_TIMER_INFO("RowConvForward", getName().c_str());
forward_[0]->calc(inputs, outputs);
}
/* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
forwardActivation();
}
}
void RowConvLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), *startPos);
inputs.addArg(*getInputValue(0), *startPos);
inputs.addArg(*weight_->getW(), wDims_);
MatrixPtr inGrad = getInputGrad(0);
MatrixPtr wGrad = weight_->getWGrad();
size_t h = getInputValue(0)->getHeight();
size_t w = getInputValue(0)->getWidth();
outputs.addArg(
inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)),
*startPos,
ADD_TO);
outputs.addArg(
wGrad ? (*wGrad)
: *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)),
wDims_,
ADD_TO);
{
REGISTER_TIMER_INFO("RowConvBackward", getName().c_str());
backward_[0]->calc(inputs, outputs);
}
{
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weight_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* \brief Row Convolution Layer.
*/
class RowConvLayer : public Layer {
public:
explicit RowConvLayer(const LayerConfig& config) : Layer(config) {}
~RowConvLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
protected:
// Row convolution weight, context_lenght_ * fan_out.
// fan_out is the size of output feature.
std::unique_ptr<Weight> weight_;
// The step number to look ahead plus one equals contexLength_.
size_t contexLength_;
TensorShape wDims_;
};
} // namespace paddle
......@@ -59,7 +59,7 @@ lstm_nest_group = recurrent_group(
input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group")
# hasSubseq ->(seqlastins) seq
lstm_last = last_seq(
input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE)
input=lstm_nest_group, agg_level=AggregateLevel.TO_SEQUENCE)
# seq ->(expand) hasSubseq
lstm_expand = expand_layer(
......@@ -71,7 +71,7 @@ lstm_expand = expand_layer(
lstm_average = pooling_layer(
input=lstm_expand,
pooling_type=AvgPooling(),
agg_level=AggregateLevel.EACH_SEQUENCE)
agg_level=AggregateLevel.TO_SEQUENCE)
with mixed_layer(
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
......
......@@ -1705,6 +1705,26 @@ TEST(Layer, TransLayer) {
}
}
TEST(Layer, RowConvLayer) {
const int context = 3;
const int size = 512;
TestConfig config;
config.layerConfig.set_type("row_conv");
config.layerConfig.set_size(size);
config.layerConfig.set_active_type("sigmoid");
config.inputDefs.push_back(
{INPUT_SEQUENCE_DATA, "layer_0", size, context * size});
LayerInputConfig* input = config.layerConfig.add_inputs();
RowConvConfig* conv = input->mutable_row_conv_conf();
conv->set_context_length(context);
for (auto useGpu : {false, true}) {
testLayerGrad(config, "row_conv", 100, false, useGpu, false);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
if(WITH_TESTING)
add_subdirectory(test)
endif()
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
#include "paddle/majel/place.h"
#include <sstream>
#include "gtest/gtest.h"
#include "paddle/utils/Logging.h"
TEST(Place, Equality) {
majel::CpuPlace cpu;
......@@ -38,5 +37,4 @@ TEST(Place, Print) {
ss << majel::CpuPlace();
EXPECT_EQ("CpuPlace", ss.str());
}
LOG(INFO) << "\n[----------] Done \n";
}
cc_test(place_test
SRCS place_test.cc
DEPS place)
cc_test(ddim_test
SRCS ddim_test.cc
DEPS ddim)
if(WITH_GPU)
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
endif()
......@@ -758,7 +758,7 @@ public:
T p3); // decayRate
/// apply L1/L2 to *this*
void applyL1(T learningRate, T decayRate);
virtual void applyL1(T learningRate, T decayRate);
void applyL1(BaseMatrixT& lr, T learningRate, T decayRate);
void applyL2(T learningRate, T decayRate);
void applyL2(BaseMatrixT& lr, T learningRate, T decayRate);
......
......@@ -3606,7 +3606,7 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) {
}
}
void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
void CpuMatrix::smoothL1(Matrix& output, Matrix& label, real destScale) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
......@@ -3624,6 +3624,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
real absVal = std::fabs(out[j] - lbl[j]);
cost[i] *= destScale;
if (absVal < 1.0)
cost[i] += 0.5 * absVal * absVal;
else
......@@ -3632,7 +3633,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
}
}
void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label, real destScale) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
......@@ -3650,6 +3651,7 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
real val = out[j] - lbl[j];
grad[j] *= destScale;
if (std::fabs(val) < 1) {
grad[j] += val;
} else {
......
......@@ -789,11 +789,11 @@ public:
LOG(FATAL) << "Not implemented";
}
virtual void smoothL1(Matrix& output, Matrix& label) {
virtual void smoothL1(Matrix& output, Matrix& label, real destScale) {
LOG(FATAL) << "Not implemented";
}
virtual void smoothL1Bp(Matrix& outputV, Matrix& label) {
virtual void smoothL1Bp(Matrix& outputV, Matrix& label, real destScale) {
LOG(FATAL) << "Not implemented";
}
......@@ -1736,8 +1736,8 @@ public:
/// gradient of sumOfSquares.
void sumOfSquaresBp(Matrix& outputV, Matrix& label);
void smoothL1(Matrix& output, Matrix& label);
void smoothL1Bp(Matrix& output, Matrix& label);
void smoothL1(Matrix& output, Matrix& label, real destScale);
void smoothL1Bp(Matrix& output, Matrix& label, real destScale);
void tanh(Matrix& output);
void tanhDerivative(Matrix& output);
......
......@@ -54,7 +54,7 @@ void SparseRowCpuMatrix::zeroMem() {
clearRows();
}
void SparseRowCpuMatrix::applyL1Decay(real learningRate, real decayRate) {
void SparseRowCpuMatrix::applyL1(real learningRate, real decayRate) {
apply([=](real* buf, size_t len) {
CpuVector value(0, nullptr);
value.subVecFrom(buf, 0, len);
......
......@@ -94,7 +94,7 @@ public:
/**
* apply L1 to all sparse rows, should be apply after indices ready.
*/
void applyL1Decay(real learningRate, real decayRate);
virtual void applyL1(real learningRate, real decayRate);
void clearIndices() { clearRows(); }
void zeroMemThread(size_t tid, size_t numThreads);
......
......@@ -632,7 +632,7 @@ void Argument::printValueString(std::ostream& stream,
const std::string& prefix) const {
std::unordered_map<std::string, std::string> out;
getValueString(&out);
for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) {
for (auto field : {"value", "ids", "sequence pos", "sub-sequence pos"}) {
auto it = out.find(field);
if (it != out.end()) {
stream << prefix << field << ":\n" << it->second;
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include "OptimizerFunctions.h"
#include "OptimizerWithRegularizer.h"
#include "ParameterUpdateFunctions.h"
#include "ThreadLocalBuffer.h"
#include "hl_gpu.h"
#include "paddle/math/CpuSparseMatrix.h"
#include "paddle/math/MathUtils.h"
......@@ -262,15 +263,6 @@ void Parameter::setMat(ParameterType pType, int matType) {
}
}
SparsePrefetchRowCpuMatrix* Parameter::getPrefetchMatrix() {
MatrixPtr mat = mats_[PARAMETER_VALUE];
if (mat) {
return dynamic_cast<SparsePrefetchRowCpuMatrix*>(mat.get());
}
return nullptr;
}
void Parameter::incUpdate(const UpdateCallback& callback) {
// Static parameter is fixed, and does not need to be updated
if (isStatic()) {
......@@ -422,37 +414,4 @@ bool Parameter::load(std::istream& s) {
return true;
}
ThreadLocal<std::vector<VectorPtr>> Parameter::tlsTempBufs_;
VectorPtr* Parameter::getTlsTempBufs() {
std::vector<VectorPtr>& bufs = *tlsTempBufs_;
if (bufs.empty()) {
bufs.resize(NUM_PARAMETER_TYPES);
for (auto& vec : bufs) {
vec.reset(new CpuVector(0, nullptr));
}
}
return bufs.data();
}
void Parameter::exec(ExecFunc func) {
auto execFunc = [this, func](int tid, size_t numThreads) {
if (numThreads == 1) { // single thread
func(this->getBufs());
} else { // multi thread
VectorPtr* vecs = Parameter::getTlsTempBufs();
auto interval = calcSplitArrayInterval(
this->getSize(), (size_t)tid, numThreads, 8LU /*for avx*/);
for (size_t i = 0; i < (size_t)NUM_PARAMETER_TYPES; ++i) {
if (bufs_[i]) {
vecs[i]->subVecFrom(*bufs_[i], interval);
}
}
func(vecs);
}
};
getBuf(PARAMETER_VALUE)->exec(execFunc);
}
} // namespace paddle
......@@ -40,17 +40,6 @@ class Parameter;
typedef std::function<void(Parameter* param)> UpdateCallback;
typedef std::function<void(int paramId, Parameter* param)> ParamInitCallback;
struct Segment {
int64_t beginDim;
int64_t endDim;
// We allow the possibility that the parameters are not stored at contiguous
// memory locations for speed reason (i.e. data alignemnt)
// This means that the dimenstion is not same as the position in the memroy
// buffer.
int64_t beginPos; // beginning position in the local value or grad buffer
};
class Parameter;
typedef std::shared_ptr<Parameter> ParameterPtr;
......@@ -167,13 +156,6 @@ public:
}
}
void enableSharedType(ParameterType type, VectorPtr vec, MatType matType) {
if (!bufs_[type]) {
bufs_[type] = vec;
setMat(type, matType);
}
}
/// for batchGradientMachine: blockNum is number of partitions of the matrix.
bool isGradShared(size_t* blockNum = NULL);
......@@ -203,20 +185,6 @@ public:
const MatrixPtr& getMat(ParameterType pType) const { return mats_[pType]; }
const IVectorPtr& getIntBuf(ParameterType pType) { return intBufs_[pType]; }
void setIntBuf(ParameterType pType, const IVectorPtr& iVec) {
intBufs_[pType] = iVec;
}
SparsePrefetchRowCpuMatrix* getPrefetchMatrix();
float getLearnRate() const { return config_.learning_rate(); }
float getInitMean() const { return config_.initial_mean(); }
float getInitStandardDeviation() const { return config_.initial_std(); }
void setValueUpdated() { updated_ = true; }
void clearValueUpdated() { updated_ = false; }
......@@ -243,8 +211,6 @@ public:
*/
bool load(std::istream& is);
std::vector<Segment>& getGradientSegments() { return gradSegments_; }
void incShared() { sharedCount_++; }
/**
......@@ -351,35 +317,22 @@ protected:
int sharedCount_;
int updateCounter_;
std::vector<Segment> gradSegments_; // segments of non-zero gradient
bool updated_;
SparseFormat format_;
static ThreadLocal<std::vector<VectorPtr>> tlsTempBufs_;
std::vector<std::shared_ptr<IParameterUpdaterHook>> updaterHooks_;
public:
void setSharedCount(int cnt) { sharedCount_ = cnt; }
int getSharedCount() { return sharedCount_; }
void singleUpdate(void* data);
bool isSparse() { return config_.is_sparse(); }
SparseFormat getFormat() { return format_; }
static const std::string kMissParameterFail;
static const std::string kMissParameterRand;
static const std::string kMissParameterZero;
static VectorPtr* getTlsTempBufs();
/**
* exec a func in single/multi thread.
* vecs is bufs_ of Parameter, as input of ExecFunc.
*/
typedef std::function<void(const VectorPtr vecs[])> ExecFunc;
void exec(ExecFunc func);
};
typedef std::map<std::string, ParameterPtr> ParameterMap;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ThreadLocalBuffer.h"
#include "Parameter.h"
namespace paddle {
namespace parameter {
static ThreadLocal<std::vector<VectorPtr>> tlsTempBufs_;
VectorPtr* getThreadLocalBuffer() {
std::vector<VectorPtr>& bufs = *tlsTempBufs_;
if (bufs.empty()) {
bufs.resize(NUM_PARAMETER_TYPES);
for (auto& vec : bufs) {
vec.reset(new CpuVector(0, nullptr));
}
}
return bufs.data();
}
} // namespace parameter
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/math/Vector.h"
namespace paddle {
namespace parameter {
extern VectorPtr* getThreadLocalBuffer();
} // namespace parameter
} // namespace paddle
......@@ -42,7 +42,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(outStart[3], 4);
CHECK_EQ(outStart[4], 7);
CHECK_EQ(stridePositions->getSize(), 8);
CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]);
......
......@@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
setOption(sockfd);
/// Now connect to the server
int retry_second = 0;
int error = 0;
int retry_count = 0;
do {
error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr));
if (error == ECONNREFUSED) {
if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) {
break;
}
if (errno == ECONNREFUSED) {
LOG(WARNING) << "connection refused by pserver, try again!";
if (retry_second++ >= 7) {
if (retry_count++ >= 7) {
LOG(FATAL) << "connection refused by pserver, maybe pserver failed!";
}
std::this_thread::sleep_for(std::chrono::seconds(1));
} else {
PCHECK(error >= 0) << "ERROR connecting to " << serverAddr;
PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":"
<< serverPort << "errorno: " << errno;
}
} while (error == ECONNREFUSED);
} while (errno == ECONNREFUSED);
channel_.reset(new SocketChannel(sockfd, serverAddr));
tcpRdma_ = F_TCP;
......
......@@ -243,7 +243,8 @@ void ParameterClient2::prepareSendData(
CHECK_GE(blockSize, 1LU) << "blockSize should > 0 " << blockSize;
const auto paraSize = parameter->getSize();
if (sparseUpdate) {
const auto prefetchMat = parameter->getPrefetchMatrix();
auto prefetchMat = std::dynamic_pointer_cast<SparsePrefetchRowCpuMatrix>(
parameter->getMat(PARAMETER_VALUE));
CHECK(prefetchMat != nullptr) << "prefetchMat is nullptr";
auto sendMat = dynamic_cast<SparseRowCpuMatrix*>(
parameter->getMat(parameterType).get());
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <fstream>
#include "paddle/math/SIMDFunctions.h"
#include "paddle/parameter/AverageOptimizer.h"
#include "paddle/parameter/FirstOrderOptimizer.h"
#include "paddle/parameter/OptimizerFunctions.h"
......@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/parameter/ParameterOptimizer.h"
#include "paddle/parameter/ParameterUpdateFunctions.h"
#include "paddle/parameter/Regularizer.h"
#include "paddle/parameter/ThreadLocalBuffer.h"
#include "paddle/utils/Flags.h"
#include "paddle/utils/GlobalConstants.h"
#include "paddle/utils/Stat.h"
......@@ -618,7 +618,7 @@ void ParameterServer2::asyncSGD(const SendParameterRequest& request,
bool commitGradient = asyncGrdientCommitCheckAndStat(request);
VectorPtr* vecs = Parameter::getTlsTempBufs();
VectorPtr* vecs = parameter::getThreadLocalBuffer();
size_t bufferIndex = 0;
for (const auto& block : request.blocks()) {
int64_t offset = getBlockOffset(block);
......@@ -1051,15 +1051,15 @@ void ParameterServer2::clearUnusedSegments(CpuVector* vec) {
}
void ParameterServer2::parallelExecForEachBlock(ExecFunc func) {
SyncThreadPool::execHelper(syncThreadPool_.get(),
[&](int tid, size_t numThreads) {
int64_t numBlocks = blockIdMap_.size();
VectorPtr* vecs = Parameter::getTlsTempBufs();
for (int64_t blockId = tid; blockId < numBlocks;
blockId += numThreads) {
func(blockId, vecs);
}
});
SyncThreadPool::execHelper(
syncThreadPool_.get(), [&](int tid, size_t numThreads) {
int64_t numBlocks = blockIdMap_.size();
VectorPtr* vecs = parameter::getThreadLocalBuffer();
for (int64_t blockId = tid; blockId < numBlocks;
blockId += numThreads) {
func(blockId, vecs);
}
});
}
void ParameterServer2::blockTraverse(
......
......@@ -17,6 +17,7 @@ import collections
import swig_paddle
import numpy
import itertools
from functools import reduce
__all__ = ['DataProviderConverter']
......@@ -65,6 +66,8 @@ class IScanner(object):
:param argument: Output arguments object.
:type argument: swig_paddle.Arguments
:param dat: Output arguments object.
:type dat: The Python object, numpy.array or List.
:return:
"""
pass
......@@ -95,17 +98,35 @@ class DenseScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__mat__ = None
self.__shape__ = None
self.__height__ = 0
self.__dim__ = 0
def pre_scan(self, dat):
self.__height__ += 1
if self.__shape__ is None:
self.__shape__ = numpy.array(dat).shape
if len(self.__shape__) > 3:
raise ValueError(
"The dimension of input cannot be greater than 3.")
self.__dim__ = reduce(lambda x, y: x * y, self.__shape__)
if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim:
raise ValueError(
"The data size must be equal to it in data layer.")
else:
if self.__shape__ != numpy.array(dat).shape:
raise ValueError(
"The data shape must be same in one mini-batch.")
def finish_pre_scan(self, argument):
self.__mat__ = numpy.ndarray(
shape=(self.__height__, self.input_type.dim), dtype=numpy.float32)
shape=(self.__height__, self.__dim__), dtype=numpy.float32)
self.__height__ = 0
def scan(self, dat):
# It's better to use NumPy array for speed.
dat = numpy.array(dat)
dat = dat.flatten()
self.__mat__[self.__height__] = dat
self.__height__ += 1
......@@ -116,6 +137,14 @@ class DenseScanner(IScanner):
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
self.data_in_gpu)
argument.setSlotValue(self.pos, m)
if len(self.__shape__) > 1:
# The last-two dimenstions are the frame height and width.
# For example, the layout is CHW for 3-D feature of image.
# The H and W are the fram height and width.
h, w = self.__shape__[-2:]
argument.setSlotFrameHeight(self.pos, h)
argument.setSlotFrameWidth(self.pos, w)
self.__shape__ = None
class SparseBinaryScanner(IScanner):
......
......@@ -21,6 +21,8 @@ cd /paddle/build
# build script will not fail if *.deb does not exist
rm *.deb 2>/dev/null || true
# delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true
cat <<EOF
========================================
......@@ -56,7 +58,7 @@ EOF
make -j `nproc`
if [ ${WITH_TESTING:-OFF} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
pip uninstall -y py-paddle paddle || true
ctest -V
ctest --output-on-failure
fi
......
......@@ -24,12 +24,21 @@ PYTHON=$1; shift
if [ $USE_VIRTUALENV_FOR_TEST -ne 0 ]; then
rm -rf .test_env
virtualenv .test_env
unset PYTHONHOME
unset PYTHONPATH
source .test_env/bin/activate
PYTHON=python
fi
export PYTHONPATH=$SCRIPTPATH/../../python/
$PYTHON -m pip install $SCRIPTPATH/../dist/*.whl requests matplotlib opencv-python ipython==5.3
$PYTHON -m pip install $SCRIPTPATH/../dist/*.whl
if [ "X${PADDLE_PACKAGE_DIR}" != "X" ]; then
$PYTHON -m pip install ${PADDLE_PACKAGE_DIR}/*.whl
else
export PYTHONPATH=$SCRIPTPATH/../../python/
fi
$PYTHON -m pip install ipython==5.3
for fn in "$@"
do
......
cc_library(stringpiece SRCS stringpiece.cc)
cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags)
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/strings/stringpiece.h"
#include <string.h>
#include <algorithm>
#include <iosfwd>
#include <stdexcept>
namespace paddle {
StringPiece::StringPiece() : data_(NULL), size_(0) {}
StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) {
if (d == NULL && n != 0)
throw std::invalid_argument(
"StringPiece requires len to be 0 for NULL data");
}
StringPiece::StringPiece(const char* s) : data_(s) {
size_ = (s == NULL) ? 0 : strlen(s);
}
StringPiece::StringPiece(const std::string& s)
: data_(s.data()), size_(s.size()) {}
char StringPiece::operator[](size_t n) const {
if (n >= len())
throw std::invalid_argument("index out of StringPiece length");
return data_[n];
}
int Compare(StringPiece a, StringPiece b) {
const size_t min_len = (a.len() < b.len()) ? a.len() : b.len();
int r = memcmp(a.data(), b.data(), min_len);
if (r == 0) {
if (a.len() < b.len())
return -1;
else if (a.len() > b.len())
return 1;
}
return r;
}
bool operator==(StringPiece x, StringPiece y) {
return ((x.len() == y.len()) &&
(x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0));
}
bool operator!=(StringPiece x, StringPiece y) { return !(x == y); }
bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; }
bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; }
bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; }
bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; }
bool HasPrefix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0));
}
bool HasSuffix(StringPiece s, StringPiece x) {
return ((s.len() >= x.len()) &&
(memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0));
}
StringPiece SkipPrefix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data() + n, s.len() - n);
}
StringPiece SkipSuffix(StringPiece s, size_t n) {
if (n > s.len())
throw std::invalid_argument("Skip distance larger than StringPiece length");
return StringPiece(s.data(), s.len() - n);
}
StringPiece TrimPrefix(StringPiece s, StringPiece x) {
return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s;
}
StringPiece TrimSuffix(StringPiece s, StringPiece x) {
return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s;
}
bool Contains(StringPiece s, StringPiece sub) {
return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end();
}
size_t Index(StringPiece s, StringPiece sub) {
auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end());
return e != s.end() ? e - s.data() : StringPiece::npos;
}
size_t Find(StringPiece s, char c, size_t pos) {
if (pos >= s.len()) {
return StringPiece::npos;
}
const char* result =
reinterpret_cast<const char*>(memchr(s.data() + pos, c, s.len() - pos));
return result != nullptr ? result - s.data() : StringPiece::npos;
}
size_t RFind(StringPiece s, char c, size_t pos) {
if (s.len() == 0) return StringPiece::npos;
for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data();
p--) {
if (*p == c) {
return p - s.data();
}
}
return StringPiece::npos;
}
StringPiece SubStr(StringPiece s, size_t pos, size_t n) {
if (pos > s.len()) pos = s.len();
if (n > s.len() - pos) n = s.len() - pos;
return StringPiece(s.data() + pos, n);
}
std::ostream& operator<<(std::ostream& o, StringPiece piece) {
return o << piece.ToString();
}
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once
#include <ostream>
#include <string>
namespace paddle {
// StringPiece points into a std::string object but doesn't own the
// string. It is for efficient access to strings. Like Go's string
// type. Not that StringPiece doesn't mutate the underlying string,
// so it is thread-safe given that the underlying string doesn't
// change. Because StringPiece contains a little data members, and
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct StringPieces and pass them around.
class StringPiece {
public:
static const size_t npos = static_cast<size_t>(-1);
// We provide non-explicit singleton constructors so users can
// pass in a "const char*" or a "string" wherever a "StringPiece"
// is expected. These contructors ensure that if data_ is NULL,
// size_ is 0.
StringPiece();
StringPiece(const char* d, size_t n);
StringPiece(const char* d);
StringPiece(const std::string& s);
const char* data() const { return data_; }
size_t len() const { return size_; }
char operator[](size_t n) const;
// StringPiece doesn't own the string, so both iterator and const
// iterator are const char* indeed.
typedef const char* const_iterator;
typedef const char* iterator;
iterator begin() const { return data_; }
iterator end() const { return data_ + size_; }
// Return a string that contains the copy of the referenced data.
std::string ToString() const { return std::string(data_, size_); }
private:
const char* data_;
size_t size_;
// Intentionally copyable
};
int Compare(StringPiece a, StringPiece b);
bool operator==(StringPiece x, StringPiece y);
bool operator!=(StringPiece x, StringPiece y);
bool operator<(StringPiece x, StringPiece y);
bool operator>(StringPiece x, StringPiece y);
bool operator<=(StringPiece x, StringPiece y);
bool operator>=(StringPiece x, StringPiece y);
bool HasPrefix(StringPiece s, StringPiece prefix);
bool HasSuffix(StringPiece s, StringPiece suffix);
StringPiece SkipPrefix(StringPiece s, size_t n);
StringPiece SkipSuffix(StringPiece s, size_t n);
// Skip the prefix (or suffix) if it matches with the string.
StringPiece TrimPrefix(StringPiece s, StringPiece prefix);
StringPiece TrimSuffix(StringPiece s, StringPiece suffix);
// Returns if s contains sub. Any s except for empty s contains an
// empty sub.
bool Contains(StringPiece s, StringPiece sub);
// Return the first occurrence of sub in s, or npos. If both s and
// sub is empty, it returns npos; otherwise, if only sub is empty, it
// returns 0.
size_t Index(StringPiece s, StringPiece sub);
// Return the first occurrence of c in s[pos:end], or npos.
size_t Find(StringPiece s, char c, size_t pos);
// Search range is [0..pos] inclusive. If pos == npos, search everything.
size_t RFind(StringPiece s, char c, size_t pos);
StringPiece SubStr(StringPiece s, size_t pos, size_t n);
// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, StringPiece piece);
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/strings/stringpiece.h"
#include <sstream>
#include "gtest/gtest.h"
TEST(StringPiece, Construct) {
{
paddle::StringPiece s;
EXPECT_EQ(NULL, s.data());
EXPECT_EQ(0U, s.len());
}
{ EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); }
{
paddle::StringPiece s(NULL);
EXPECT_EQ(0U, s.len());
}
{
std::string a;
EXPECT_EQ(0U, a.size());
paddle::StringPiece s(a);
EXPECT_EQ(0U, s.len());
}
}
TEST(StringPiece, CopyAndAssign) {
paddle::StringPiece empty;
EXPECT_EQ(0U, empty.len());
paddle::StringPiece a("hello");
paddle::StringPiece b = a;
EXPECT_EQ(b.len(), strlen("hello"));
EXPECT_EQ(a, b);
std::string storage("hello");
paddle::StringPiece c(storage);
EXPECT_EQ(a, c);
EXPECT_NE(a.data(), c.data());
}
TEST(StringPiece, Compare) {
{
paddle::StringPiece a("hello");
paddle::StringPiece b("world");
EXPECT_TRUE(a != b);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a < b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a > b);
EXPECT_FALSE(a >= b);
EXPECT_LT(Compare(a, b), 0);
EXPECT_GT(Compare(b, a), 0);
}
{
paddle::StringPiece a, b;
EXPECT_TRUE(a == b);
EXPECT_FALSE(a != b);
EXPECT_FALSE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_TRUE(a >= b);
EXPECT_EQ(0, Compare(a, b));
EXPECT_EQ(0, Compare(b, a));
}
}
TEST(StringPiece, ToString) {
{
paddle::StringPiece s;
EXPECT_EQ(std::string(""), s.ToString());
}
{
paddle::StringPiece s(NULL);
EXPECT_EQ(std::string(""), s.ToString());
}
{
paddle::StringPiece s("hello");
EXPECT_EQ(std::string("hello"), s.ToString());
}
}
TEST(StringPiece, HasPrefixSuffix) {
using paddle::HasPrefix;
using paddle::HasSuffix;
{
paddle::StringPiece s;
EXPECT_FALSE(HasPrefix(s, "something"));
EXPECT_TRUE(HasPrefix(s, ""));
EXPECT_FALSE(HasSuffix(s, "something"));
EXPECT_TRUE(HasSuffix(s, ""));
}
{
paddle::StringPiece s("app");
EXPECT_TRUE(HasPrefix(s, ""));
EXPECT_TRUE(HasPrefix(s, "a"));
EXPECT_TRUE(HasPrefix(s, "ap"));
EXPECT_TRUE(HasPrefix(s, "app"));
EXPECT_TRUE(HasSuffix(s, ""));
EXPECT_TRUE(HasSuffix(s, "p"));
EXPECT_TRUE(HasSuffix(s, "pp"));
EXPECT_TRUE(HasSuffix(s, "app"));
}
}
TEST(StringPiece, SkipPrefixSuffix) {
using paddle::SkipPrefix;
using paddle::SkipSuffix;
{
paddle::StringPiece s;
EXPECT_EQ("", SkipPrefix(s, 0));
EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument);
EXPECT_EQ("", SkipSuffix(s, 0));
EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument);
}
{
paddle::StringPiece s("app");
EXPECT_EQ("app", SkipPrefix(s, 0));
EXPECT_EQ("pp", SkipPrefix(s, 1));
EXPECT_EQ("p", SkipPrefix(s, 2));
EXPECT_EQ("", SkipPrefix(s, 3));
EXPECT_THROW(SkipPrefix(s, 4), std::invalid_argument);
EXPECT_EQ("app", SkipSuffix(s, 0));
EXPECT_EQ("ap", SkipSuffix(s, 1));
EXPECT_EQ("a", SkipSuffix(s, 2));
EXPECT_EQ("", SkipSuffix(s, 3));
EXPECT_THROW(SkipSuffix(s, 4), std::invalid_argument);
}
}
TEST(StringPiece, TrimPrefixSuffix) {
using paddle::TrimPrefix;
using paddle::TrimSuffix;
{
paddle::StringPiece s;
EXPECT_EQ("", TrimPrefix(s, ""));
EXPECT_EQ("", TrimPrefix(s, "something"));
EXPECT_EQ("", TrimSuffix(s, ""));
EXPECT_EQ("", TrimSuffix(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_EQ("app", TrimPrefix(s, ""));
EXPECT_EQ("pp", TrimPrefix(s, "a"));
EXPECT_EQ("p", TrimPrefix(s, "ap"));
EXPECT_EQ("", TrimPrefix(s, "app"));
EXPECT_EQ("app", TrimPrefix(s, "something"));
EXPECT_EQ("app", TrimSuffix(s, ""));
EXPECT_EQ("ap", TrimSuffix(s, "p"));
EXPECT_EQ("a", TrimSuffix(s, "pp"));
EXPECT_EQ("", TrimSuffix(s, "app"));
EXPECT_EQ("app", TrimSuffix(s, "something"));
}
}
TEST(StringPiece, Contains) {
using paddle::Contains;
{
paddle::StringPiece s;
EXPECT_FALSE(Contains(s, ""));
EXPECT_FALSE(Contains(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_TRUE(Contains(s, ""));
EXPECT_TRUE(Contains(s, "a"));
EXPECT_TRUE(Contains(s, "p"));
EXPECT_TRUE(Contains(s, "ap"));
EXPECT_TRUE(Contains(s, "pp"));
EXPECT_TRUE(Contains(s, "app"));
EXPECT_FALSE(Contains(s, "something"));
}
}
TEST(StringPiece, Index) {
using paddle::Index;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, Index(s, ""));
EXPECT_EQ(npos, Index(s, "something"));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(0U, Index(s, ""));
EXPECT_EQ(0U, Index(s, "a"));
EXPECT_EQ(1U, Index(s, "p"));
EXPECT_EQ(0U, Index(s, "ap"));
EXPECT_EQ(1U, Index(s, "pp"));
EXPECT_EQ(0U, Index(s, "app"));
EXPECT_EQ(npos, Index(s, "something"));
}
}
TEST(StringPiece, Find) {
using paddle::Find;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, Find(s, 'a', 0U));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(0U, Find(s, 'a', 0U));
EXPECT_EQ(1U, Find(s, 'p', 0U));
EXPECT_EQ(1U, Find(s, 'p', 1U));
EXPECT_EQ(2U, Find(s, 'p', 2U));
EXPECT_EQ(npos, Find(s, 'z', 2U));
}
}
TEST(StringPiece, RFind) {
using paddle::RFind;
auto npos = paddle::StringPiece::npos;
{
paddle::StringPiece s;
EXPECT_EQ(npos, RFind(s, 'a', 0U));
}
{
paddle::StringPiece s("app");
EXPECT_EQ(2U, RFind(s, 'p', 2U));
EXPECT_EQ(0U, RFind(s, 'a', 2U));
EXPECT_EQ(1U, RFind(s, 'p', 1U));
EXPECT_EQ(0U, RFind(s, 'a', 0));
EXPECT_EQ(npos, RFind(s, 'z', 2U));
}
}
TEST(StringPiece, SubStr) {
using paddle::SubStr;
{
paddle::StringPiece s;
EXPECT_EQ("", SubStr(s, 0, 0));
EXPECT_EQ("", SubStr(s, 0, 1));
EXPECT_EQ("", SubStr(s, 1, 0));
}
{
paddle::StringPiece s("app");
EXPECT_EQ("", SubStr(s, 0, 0));
EXPECT_EQ("", SubStr(s, 1, 0));
EXPECT_EQ("", SubStr(s, 2, 0));
EXPECT_EQ("", SubStr(s, 3, 0));
EXPECT_EQ("a", SubStr(s, 0, 1));
EXPECT_EQ("p", SubStr(s, 1, 1));
EXPECT_EQ("p", SubStr(s, 2, 1));
EXPECT_EQ("", SubStr(s, 3, 1));
EXPECT_EQ("ap", SubStr(s, 0, 2));
EXPECT_EQ("pp", SubStr(s, 1, 2));
EXPECT_EQ("p", SubStr(s, 2, 2));
EXPECT_EQ("", SubStr(s, 3, 2));
EXPECT_EQ("app", SubStr(s, 0, 3));
EXPECT_EQ("pp", SubStr(s, 1, 3));
EXPECT_EQ("p", SubStr(s, 2, 3));
EXPECT_EQ("", SubStr(s, 3, 3));
}
}
TEST(StringPiece, StreamOutput) {
using paddle::StringPiece;
std::stringstream o;
o << StringPiece();
EXPECT_EQ("", o.str());
o << StringPiece("hello");
EXPECT_EQ("hello", o.str());
o << StringPiece();
EXPECT_EQ("hello", o.str());
}
......@@ -747,28 +747,32 @@ void SparseRemoteParameterUpdater::getParametersRemote(bool fullSize,
bool apply) {
ParameterType sendBackParameterType =
(useApplyInPserver_ && apply) ? PARAMETER_APPLY : PARAMETER_VALUE;
std::function<void()> getParams;
std::function<void(Parameter&, real)> applyL1;
if (fullSize) {
parameterClient_->getParameter(
/* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType);
if (config_.shrink_parameter_value() > 0) {
for (auto& para : parameters_) {
if (para->getConfig().decay_rate_l1() > 0) {
para->getBuf(PARAMETER_VALUE)
->applyL1(1.0f, // learningRate
config_.shrink_parameter_value()); // decayRate
}
}
}
getParams = [&] {
parameterClient_->getParameter(
/* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType);
};
applyL1 = [](Parameter& para, real decayRate) {
para.getBuf(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate);
};
} else {
REGISTER_TIMER("getParamSparse");
parameterClient_->getParameterSparse(
/* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType);
getParams = [&] {
parameterClient_->getParameterSparse(
/* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType);
};
applyL1 = [](Parameter& para, real decayRate) {
para.getMat(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate);
};
}
{
REGISTER_TIMER("getParamDenseAndSparse");
getParams();
if (config_.shrink_parameter_value() > 0) {
for (auto& para : parameters_) {
if (para->getConfig().decay_rate_l1() > 0) {
para->getPrefetchMatrix()->applyL1Decay(
1.0f, // learningRate
config_.shrink_parameter_value()); // decayRate
applyL1(*para, config_.shrink_parameter_value());
}
}
}
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/math/SparseRowMatrix.h"
#include "paddle/parameter/ThreadLocalBuffer.h"
#include "paddle/utils/Thread.h"
DECLARE_int32(trainer_count);
......@@ -98,7 +99,7 @@ void SgdThreadUpdater::threadTraverse(
int tid,
size_t numThreads,
Parameter* para) {
VectorPtr* vecs = Parameter::getTlsTempBufs();
VectorPtr* vecs = parameter::getThreadLocalBuffer();
if (para->isGradSparseUpdate()) {
size_t height = para->getConfig().dims(0);
size_t width = para->getConfig().dims(1);
......@@ -214,7 +215,7 @@ void SgdThreadUpdater::threadUpdateSparse(int tid,
Parameter* para) {
int pid = para->getID();
ParameterOptimizer* optimizer = optimizers_[pid].get();
VectorPtr* vecs = Parameter::getTlsTempBufs();
VectorPtr* vecs = parameter::getThreadLocalBuffer();
size_t height = para->getConfig().dims(0);
size_t width = para->getConfig().dims(1);
......@@ -286,7 +287,7 @@ void SgdThreadUpdater::threadUpdateDense(int tid,
Parameter* para) {
int pid = para->getID();
ParameterOptimizer* optimizer = optimizers_[pid].get();
VectorPtr* vecs = Parameter::getTlsTempBufs();
VectorPtr* vecs = parameter::getThreadLocalBuffer();
auto interval = calcSplitArrayInterval(
para->getSize(), (size_t)tid, numThreads, 8LU /*for avx*/);
......
......@@ -21,7 +21,7 @@ limitations under the License. */
#else
#if !defined(__arm__)
#if !defined(__arm__) && !defined(__aarch64__)
#include <cpuid.h>
/// for GCC/Clang
#define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
......@@ -32,7 +32,7 @@ limitations under the License. */
namespace paddle {
SIMDFlags::SIMDFlags() {
#if defined(__arm__)
#if defined(__arm__) || defined(__aarch64__)
simd_flags_ = SIMD_NEON;
#else
unsigned int cpuInfo[4];
......
......@@ -55,8 +55,11 @@ public:
};
#else
// clang-format off
#include <cstddef>
#include <atomic>
// clang-format on
class SpinLockPrivate {
public:
inline void lock() {
......
......@@ -19,7 +19,7 @@ using namespace paddle; // NOLINT
TEST(SIMDFlags, gccTest) {
#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)) && \
!defined(__arm__)
!defined(__arm__) && !defined(__aarch64__)
// clang-format off
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
......
......@@ -194,6 +194,10 @@ message MaxOutConfig {
required uint32 groups = 2;
}
message RowConvConfig {
required uint32 context_length = 1;
}
message ProjectionConfig {
required string type = 1;
required string name = 2;
......@@ -279,6 +283,7 @@ message LayerInputConfig {
optional SppConfig spp_conf = 12;
optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14;
optional RowConvConfig row_conv_conf = 15;
}
message LayerConfig {
......
......@@ -23,14 +23,17 @@ add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp)
add_subdirectory(paddle/trainer_config_helpers/tests)
if (WITH_SWIG_PY)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests)
endif()
set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
if (WITH_TESTING)
add_subdirectory(paddle/trainer_config_helpers/tests)
if (WITH_SWIG_PY)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests)
endif()
endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels
)
......@@ -72,9 +72,16 @@ class InputType(object):
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Dense Vector. It means the input feature is dense float vector. For example,
if the input is an image with 28*28 pixels, the input of Paddle neural
network should be a dense vector with dimension 784.
Dense Array. It means the input feature is dense array with float type.
For example, if the input is an image with 28*28 pixels, the input of
Paddle neural network could be a dense vector with dimension 784 or a
numpy array with shape (28, 28).
For the 2-D convolution operation, each sample in one mini-batch must have
the similarly size in PaddlePaddle now. But, it supports variable-dimension
feature across mini-batch. For the variable-dimension, the param dim is not
used. While the data reader must yield numpy array and the data feeder will
set the data shape correctly.
:param dim: dimension of this vector.
:type dim: int
......@@ -135,6 +142,10 @@ sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot
integer_value = index_slot
# dense_array can be used for variable-length input feature.
# Each feature is not a vector, but a multi-dimensional array.
dense_array = dense_slot
def dense_vector_sequence(dim):
"""
......
......@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with
--config_args=extension_module_name=[MODULE_NAME]
'''
import copy
import logging
import os
......@@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase):
def __init__(self, name, inputs, partial_sum=1, **args):
super(ParameterReluLayer, self).__init__(
name, self.layer_type, 0, inputs=inputs, **args)
config_assert(len(self.inputs) == 1)
config_assert(self.input_layer.size % partial_sum == 0)
input_layer = self.get_input_layer(0)
config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
config_assert(input_layer.size % partial_sum == 0,
"a wrong setting for partial_sum")
self.set_layer_size(input_layer.size)
self.create_input_parameter(0, input_layer.size / partial_sum)
......@@ -2081,6 +2081,23 @@ class MaxOutLayer(LayerBase):
g_layer_map[input_layer.name].width, out_channels)
@config_layer('row_conv')
class RowConvLayer(LayerBase):
def __init__(self, name, inputs, context_length, **xargs):
super(RowConvLayer, self).__init__(
name, 'maxout', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'TransLayer must have one and only one input')
input_layer = self.get_input_layer(0)
row_conv_conf = self.config.inputs[0].row_conv_conf
row_conv_conf.context_length = context_length
self.set_layer_size(input_layer.size)
psize = context_length * input_layer.size
dims = [context_length, input_layer.size]
self.create_input_parameter(0, psize, dims)
# key: cost type
# value: cost class
g_cost_map = {}
......@@ -3371,7 +3388,7 @@ def make_importer(config_dir, config_args):
return Import
settings = dict(
DEFAULT_SETTING = dict(
batch_size=None,
mini_batch_size=None,
algorithm='async_sgd',
......@@ -3404,6 +3421,8 @@ settings = dict(
adam_beta2=0.999,
adam_epsilon=1e-8, )
settings = copy.deepcopy(DEFAULT_SETTING)
settings_deprecated = dict(usage_ratio=1., )
trainer_settings = dict(
......@@ -3544,23 +3563,32 @@ def update_g_config():
return g_config
def parse_config(trainer_config, config_arg_str):
'''
@param trainer_config: can be a string of config file name or a function name
with config logic
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
def begin_parse():
init_config_environment()
for hook in _parse_config_hooks:
hook()
config_args = {}
logger.findCaller = find_caller
logger.fatal = my_fatal
g_config.model_config.type = "nn"
global g_current_submodel, g_root_submodel
g_root_submodel = g_config.model_config.sub_models.add()
g_root_submodel.name = 'root'
g_root_submodel.is_recurrent_layer_group = False
g_current_submodel = g_root_submodel
def parse_config(trainer_config, config_arg_str):
'''
@param config_arg_str: a string of the form var1=val1,var2=val2. It will be
passed to config script as a dictionary CONFIG_ARGS
'''
begin_parse()
config_args = {}
if config_arg_str:
config_args = dict([f.split('=') for f in config_arg_str.split(',')])
......@@ -3573,14 +3601,6 @@ def parse_config(trainer_config, config_arg_str):
extension_module = importlib(extension_module_name)
g_extended_config_funcs = extension_module.get_config_funcs(g_config)
g_config.model_config.type = 'nn'
global g_current_submodel, g_root_submodel
g_root_submodel = g_config.model_config.sub_models.add()
g_root_submodel.name = 'root'
g_root_submodel.is_recurrent_layer_group = False
g_current_submodel = g_root_submodel
if hasattr(trainer_config, '__call__'):
trainer_config.func_globals.update(
make_config_environment("", config_args))
......
......@@ -110,15 +110,16 @@ class ParameterAttribute(object):
momentum=None,
gradient_clipping_threshold=None,
sparse_update=False):
# initialize strategy.
self.attr = {}
if is_static:
self.attr = {'is_static': True}
elif initial_std is None and initial_mean is None and initial_max \
self.attr['is_static'] = True
if initial_std is None and initial_mean is None and initial_max \
is None and initial_min is None:
self.attr = {'initial_smart': True}
self.attr['initial_smart'] = True
elif is_compatible_with(initial_std, float) or \
is_compatible_with(initial_mean, float):
self.attr = dict()
if initial_std is not None:
self.attr['initial_std'] = initial_std
if initial_mean is not None:
......@@ -131,7 +132,6 @@ class ParameterAttribute(object):
assert initial_min < initial_max
initial_mean = (initial_max + initial_min) / 2
initial_std = initial_mean - initial_min
self.attr = dict()
self.attr['initial_mean'] = initial_mean
self.attr['initial_std'] = initial_std
self.attr['initial_strategy'] = 1 # Uniform Random
......
......@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import paddle.trainer.config_parser as config_parser
from paddle.proto.TrainerConfig_pb2 import OptimizationConfig
'''
This file is a wrapper of formal config_parser. The main idea of this file is to
This file is a wrapper of formal config_parser. The main idea of this file is to
separete different config logic into different function, such as network configuration
and optimizer configuration.
'''
__all__ = [
"parse_trainer_config", "parse_network_config", "parse_optimizer_config"
"parse_trainer_config", "parse_network_config", "parse_optimizer_config",
"reset_parser"
]
......@@ -34,5 +37,15 @@ def parse_network_config(network_conf, config_arg_str=''):
def parse_optimizer_config(optimizer_conf, config_arg_str=''):
config = config_parser.parse_config(optimizer_conf, config_arg_str)
return config.opt_config
config_parser.settings = copy.deepcopy(config_parser.DEFAULT_SETTING)
optimizer_conf()
opt_config = OptimizationConfig()
for k, v in config_parser.settings.iteritems():
if v is None:
continue
opt_config.__setattr__(k, v)
return opt_config
def reset_parser():
config_parser.begin_parse()
......@@ -31,31 +31,31 @@ except ImportError:
import copy
__all__ = [
"full_matrix_projection",
"AggregateLevel",
"ExpandLevel",
"identity_projection",
"dotmul_projection",
"dotmul_operator",
"repeat_layer",
"seq_reshape_layer",
"table_projection",
"mixed_layer",
"data_layer",
"embedding_layer",
"fc_layer",
"grumemory",
"pooling_layer",
"lstmemory",
"last_seq",
"first_seq",
"cos_sim",
"hsigmoid",
"conv_projection",
"mse_cost",
"regression_cost",
'full_matrix_projection',
'AggregateLevel',
'ExpandLevel',
'identity_projection',
'dotmul_projection',
'dotmul_operator',
'repeat_layer',
'seq_reshape_layer',
'table_projection',
'mixed_layer',
'data_layer',
'embedding_layer',
'fc_layer',
'grumemory',
'pooling_layer',
'lstmemory',
'last_seq',
'first_seq',
'cos_sim',
'hsigmoid',
'conv_projection',
'mse_cost',
'regression_cost',
'classification_cost',
"LayerOutput",
'LayerOutput',
'img_conv_layer',
'img_pool_layer',
'batch_norm_layer',
......@@ -111,6 +111,7 @@ __all__ = [
'block_expand_layer',
'maxout_layer',
'out_prod_layer',
'printer_layer',
'print_layer',
'priorbox_layer',
'cross_channel_norm_layer',
......@@ -119,6 +120,10 @@ __all__ = [
'eos_layer',
'smooth_l1_cost',
'layer_support',
'multiplex_layer',
'row_conv_layer',
'dropout_layer',
'prelu_layer',
]
......@@ -127,26 +132,26 @@ class LayerType(object):
Layer type enumerations.
"""
DATA = "data"
MIXED_LAYER = "mixed"
LSTMEMORY = "lstmemory"
GRUMEMORY = "gated_recurrent"
SEQUENCE_LAST_INSTANCE = "seqlastins"
SEQUENCE_FIRST_INSTANCE = "seqfirstins"
SEQUENCE_RESHAPE = "seqreshape"
POOLING_MAX = "max"
DATA = 'data'
MIXED_LAYER = 'mixed'
LSTMEMORY = 'lstmemory'
GRUMEMORY = 'gated_recurrent'
SEQUENCE_LAST_INSTANCE = 'seqlastins'
SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
SEQUENCE_RESHAPE = 'seqreshape'
POOLING_MAX = 'max'
POOLING_AVG = 'average'
FC_LAYER = "fc"
FC_LAYER = 'fc'
COST = 'cost'
COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv"
CONVTRANS_LAYER = "convt"
EXCONV_LAYER = "exconv"
EXCONVTRANS_LAYER = "exconvt"
CUDNNCONV_LAYER = "cudnn_conv"
POOL_LAYER = "pool"
CONV_LAYER = 'conv'
CONVTRANS_LAYER = 'convt'
EXCONV_LAYER = 'exconv'
EXCONVTRANS_LAYER = 'exconvt'
CUDNNCONV_LAYER = 'cudnn_conv'
POOL_LAYER = 'pool'
BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm'
SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
......@@ -185,25 +190,29 @@ class LayerType(object):
MAXOUT = "maxout"
SPP_LAYER = "spp"
PAD_LAYER = "pad"
MULTIPLEX_LAYER = "multiplex"
ROW_CONV_LAYER = "row_conv"
PRINT_LAYER = "print"
PRIORBOX_LAYER = "priorbox"
PRINT_LAYER = 'print'
PRIORBOX_LAYER = 'priorbox'
CTC_LAYER = "ctc"
WARP_CTC_LAYER = "warp_ctc"
CRF_LAYER = "crf"
CRF_DECODING_LAYER = "crf_decoding"
CTC_LAYER = 'ctc'
WARP_CTC_LAYER = 'warp_ctc'
CRF_LAYER = 'crf'
CRF_DECODING_LAYER = 'crf_decoding'
NCE_LAYER = 'nce'
RANK_COST = "rank-cost"
LAMBDA_COST = "lambda_cost"
HUBER = "huber"
CROSS_ENTROPY = "multi-class-cross-entropy"
CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
SUM_COST = "sum_cost"
SMOOTH_L1 = "smooth_l1"
RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost'
HUBER = 'huber'
CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
SUM_COST = 'sum_cost'
SMOOTH_L1 = 'smooth_l1'
PRELU = 'prelu'
@staticmethod
def is_layer_type(type_name):
......@@ -235,16 +244,19 @@ class AggregateLevel(object):
Accordingly, AggregateLevel supports two modes:
- :code:`AggregateLevel.EACH_TIMESTEP` means the aggregation acts on each
- :code:`AggregateLevel.TO_NO_SEQUENCE` means the aggregation acts on each
timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will
be aggregated to :code:`NO_SEQUENCE`.
- :code:`AggregateLevel.EACH_SEQUENCE` means the aggregation acts on each
- :code:`AggregateLevel.TO_SEQUENCE` means the aggregation acts on each
sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to
:code:`SEQUENCE`.
"""
EACH_TIMESTEP = 'non-seq'
EACH_SEQUENCE = 'seq'
TO_NO_SEQUENCE = 'non-seq'
TO_SEQUENCE = 'seq'
# compatible with previous configuration
EACH_TIMESTEP = TO_NO_SEQUENCE
EACH_SEQUENCE = TO_SEQUENCE
class LayerOutput(object):
......@@ -285,6 +297,7 @@ class LayerOutput(object):
assert size is not None
assert LayerType.is_layer_type(layer_type)
self.name = name
self.full_name = MakeLayerNameInSubmodel(name)
self.layer_type = layer_type
if parents is not None and type(parents) != list:
parents = [parents]
......@@ -479,7 +492,7 @@ def table_projection(input, size=0, param_attr=None):
return proj
def identity_projection(input, offset=None):
def identity_projection(input, offset=None, size=None):
"""
1. IdentityProjection if offset=None. It performs:
......@@ -520,8 +533,10 @@ def identity_projection(input, offset=None):
proj = IdentityProjection(input_layer_name=input.name)
proj.origin = input
else:
if size is None:
size = input.size - offset
proj = IdentityOffsetProjection(
input_layer_name=input.name, offset=offset)
input_layer_name=input.name, offset=offset, size=size)
proj.origin = input
return proj
......@@ -961,7 +976,7 @@ def fc_layer(input,
@wrap_name_default("print")
def print_layer(input, name=None):
def printer_layer(input, name=None):
"""
Print the output value of input layers. This layer is useful for debugging.
......@@ -983,6 +998,13 @@ def print_layer(input, name=None):
inputs=[l.name for l in input], )
# this layer don't return anything, can not be input of other layer.
# Keep print_layer for compatibility with V1 API.
# 'print_layer' does not work for V2 API because it will be changed to
# 'print' for V2 API. But 'print' is a reserved key word in python.
print_layer = printer_layer
@wrap_name_default("priorbox")
def priorbox_layer(input,
......@@ -1078,7 +1100,7 @@ def pooling_layer(input,
pooling_type=None,
name=None,
bias_attr=None,
agg_level=AggregateLevel.EACH_TIMESTEP,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
layer_attr=None):
"""
Pooling layer for sequence inputs, not used for Image.
......@@ -1089,10 +1111,10 @@ def pooling_layer(input,
seq_pool = pooling_layer(input=layer,
pooling_type=AvgPooling(),
agg_level=AggregateLevel.EACH_SEQUENCE)
agg_level=AggregateLevel.TO_NO_SEQUENCE)
:param agg_level: AggregateLevel.EACH_TIMESTEP or
AggregateLevel.EACH_SEQUENCE
:param agg_level: AggregateLevel.TO_NO_SEQUENCE or
AggregateLevel.TO_SEQUENCE
:type agg_level: AggregateLevel
:param name: layer name.
:type name: basestring
......@@ -1362,7 +1384,7 @@ def grumemory(input,
@layer_support()
def last_seq(input,
name=None,
agg_level=AggregateLevel.EACH_TIMESTEP,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1,
layer_attr=None):
"""
......@@ -1397,7 +1419,7 @@ def last_seq(input,
" series information at all. Maybe you want to use"
" first_seq instead.")
if agg_level == AggregateLevel.EACH_SEQUENCE:
if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1
Layer(
......@@ -1418,7 +1440,7 @@ def last_seq(input,
@layer_support()
def first_seq(input,
name=None,
agg_level=AggregateLevel.EACH_TIMESTEP,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1,
layer_attr=None):
"""
......@@ -1454,7 +1476,7 @@ def first_seq(input,
' time series information at all. Maybe you want to use'
' last_seq instead.')
if agg_level == AggregateLevel.EACH_SEQUENCE:
if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1
Layer(
......@@ -1477,16 +1499,18 @@ class ExpandLevel(object):
ExpandLevel supports two modes:
- :code:`ExpandLevel.FROM_TIMESTEP` means the expandation acts on each
timestep of a sequence, :code:`NO_SEQUENCE` will be expanded to
- :code:`ExpandLevel.FROM_NO_SEQUENCE` means the expansion acts on
:code:`NO_SEQUENCE`, which will be expanded to
:code:`SEQUENCE` or :code:`SUB_SEQUENCE`.
- :code:`ExpandLevel.FROM_SEQUENCE` means the expandation acts on each
sequence of a nested sequence, :code:`SEQUENCE` will be expanded to
- :code:`ExpandLevel.FROM_SEQUENCE` means the expansion acts on
:code:`SEQUENCE`, which will be expanded to
:code:`SUB_SEQUENCE`.
"""
FROM_TIMESTEP = AggregateLevel.EACH_TIMESTEP
FROM_SEQUENCE = AggregateLevel.EACH_SEQUENCE
FROM_NO_SEQUENCE = AggregateLevel.TO_NO_SEQUENCE
FROM_SEQUENCE = AggregateLevel.TO_SEQUENCE
# compatible with previous configuration
FROM_TIMESTEP = FROM_NO_SEQUENCE
@wrap_name_default()
......@@ -1495,7 +1519,7 @@ def expand_layer(input,
expand_as,
name=None,
bias_attr=False,
expand_level=ExpandLevel.FROM_TIMESTEP,
expand_level=ExpandLevel.FROM_NO_SEQUENCE,
layer_attr=None):
"""
A layer for "Expand Dense data or (sequence data where the length of each
......@@ -1507,7 +1531,7 @@ def expand_layer(input,
expand = expand_layer(input=layer1,
expand_as=layer2,
expand_level=ExpandLevel.FROM_TIMESTEP)
expand_level=ExpandLevel.FROM_NO_SEQUENCE)
:param input: Input layer
:type input: LayerOutput
......@@ -2794,7 +2818,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
if layer_type == LayerType.CONCAT_LAYER:
assert not bias_attr
Layer(
layer = Layer(
name=name,
type=layer_type,
inputs=[x.name for x in input] if is_concat_layer else input,
......@@ -2802,13 +2826,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr))
sz = 0
for each_input in input:
if each_input.size is not None:
sz += each_input.size
else:
sz = None
break
sz = layer.config.size
return LayerOutput(
name,
......@@ -2912,11 +2930,11 @@ def memory(name,
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
......@@ -2976,7 +2994,7 @@ def memory(name,
@layer_support()
def lstm_step_layer(input,
state,
size,
size=None,
act=None,
name=None,
gate_act=None,
......@@ -3042,6 +3060,9 @@ def lstm_step_layer(input,
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert size is None or state.size == size
size = state.size
Layer(
name=name,
type=LayerType.LSTM_STEP_LAYER,
......@@ -3049,7 +3070,7 @@ def lstm_step_layer(input,
active_gate_type=gate_act.name,
active_state_type=state_act.name,
bias=ParamAttr.to_bias(bias_attr),
size=size,
size=state.size,
inputs=[input.name, state.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
......@@ -3400,7 +3421,7 @@ def recurrent_group(step,
else, for training or testing, one of the input type must
be LayerOutput.
: type is_generating: bool
:type is_generating: bool
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -3489,6 +3510,11 @@ def recurrent_group(step,
RecurrentLayerGroupEnd(name=name)
for layer_out in layer_outs:
# Thee previous full_name is the name is the rnn group
# We need a full_name outside the rnn group
layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name)
if len(layer_outs) == 1:
return layer_outs[0]
else:
......@@ -3748,7 +3774,6 @@ def beam_search(step,
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInput)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
......@@ -3768,7 +3793,6 @@ def beam_search(step,
predict = gipt.after_real_step(step(*args))
eos_layer(input=predict, eos_id=eos_id, name=eos_name)
return predict
tmp = recurrent_group(
......@@ -3802,7 +3826,7 @@ def mse_cost(input, label, weight=None, name=None, coeff=1.0, layer_attr=None):
.. math::
\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
\\frac{1}{N}\sum_{i=1}^N(t_i-y_i)^2
:param name: layer name.
:type name: basestring
......@@ -3840,7 +3864,6 @@ def classification_cost(input,
label,
weight=None,
name=None,
top_k=None,
evaluator=classification_error_evaluator,
layer_attr=None):
"""
......@@ -3855,8 +3878,6 @@ def classification_cost(input,
:param weight: The weight affects the cost, namely the scale of cost.
It is an optional argument.
:type weight: LayerOutput
:param top_k: number k in top-k error rate
:type top_k: int
:param evaluator: Evaluator method.
:param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute
......@@ -3884,7 +3905,7 @@ def classification_cost(input,
assert isinstance(e.for_classification, bool)
assert e.for_classification
e(name=e.__name__, input=input, label=label, weight=weight, top_k=top_k)
e(name=e.__name__, input=input, label=label, weight=weight)
if not isinstance(evaluator, collections.Sequence):
evaluator = [evaluator]
......@@ -4705,7 +4726,7 @@ def ctc_layer(input,
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4757,27 +4778,42 @@ def warp_ctc_layer(input,
layer_attr=None):
"""
A layer intergrating the open-source `warp-ctc
<https://github.com/baidu-research/warp-ctc>` library, which is used in
<https://github.com/baidu-research/warp-ctc>`_ library, which is used in
`Deep Speech 2: End-toEnd Speech Recognition in English and Mandarin
<https://arxiv.org/pdf/1512.02595v1.pdf>`, to compute Connectionist Temporal
Classification (CTC) loss.
<https://arxiv.org/pdf/1512.02595v1.pdf>`_, to compute Connectionist Temporal
Classification (CTC) loss. Besides, another `warp-ctc
<https://github.com/gangliao/warp-ctc>`_ repository, which is forked from
the official one, is maintained to enable more compiling options. During the
building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory.
To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`,
using following methods:
1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api),
such as :code:`paddle.init(use_gpu=True,
warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`.
2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH
on Mac OS. For instance, :code:`export
LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`.
More details of CTC can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
icml2006_GravesFGS06.pdf>`_
icml2006_GravesFGS06.pdf>`_.
Note:
- Let num_classes represent the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input
size. Thus, the size of both warp_ctc_layer and 'input' layer should
be set to num_classes + 1.
label needed by CTC, you need to use (num_classes + 1) as the input size.
Thus, the size of both warp_ctc layer and 'input' layer should be set to
num_classes + 1.
- You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4838,7 +4874,7 @@ def crf_layer(input,
A layer for calculating the cost of sequential conditional random
field model.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4912,7 +4948,7 @@ def crf_decoding_layer(input,
this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5105,7 +5141,7 @@ def rank_cost(left,
- :math:`o_i` and :math:`o_j`: the left output and right output.
Their dimension is one.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5162,7 +5198,7 @@ def lambda_cost(input,
"""
lambdaCost for lambdaRank LTR approach.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5220,6 +5256,8 @@ def cross_entropy(input,
"""
A loss layer for multi class entropy.
The example usage is:
.. code-block:: python
cost = cross_entropy(input=input_layer,
......@@ -5266,6 +5304,8 @@ def cross_entropy_with_selfnorm(input,
A loss layer for multi class entropy with selfnorm.
Input should be a vector of positive numbers, without normalization.
The example usage is:
.. code-block:: python
cost = cross_entropy_with_selfnorm(input=input_layer,
......@@ -5307,6 +5347,8 @@ def sum_cost(input, name=None, layer_attr=None):
"""
A loss layer which calculate the sum of the input as loss
The example usage is:
.. code-block:: python
cost = sum_cost(input=input_layer)
......@@ -5336,6 +5378,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
"""
A loss layer for huber loss.
The example usage is:
.. code-block:: python
cost = huber_cost(input=input_layer,
......@@ -5376,6 +5420,8 @@ def multi_binary_label_cross_entropy(input,
"""
A loss layer for multi binary label cross entropy.
The example usage is:
.. code-block:: python
cost = multi_binary_label_cross_entropy(input=input_layer,
......@@ -5435,6 +5481,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
More details can be found by referring to `Fast R-CNN
<https://arxiv.org/pdf/1504.08083v2.pdf>`_
The example usage is:
.. code-block:: python
cost = smooth_l1_cost(input=input_layer,
......@@ -5465,3 +5513,208 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.SMOOTH_L1, parents=[input, label], size=1)
@wrap_name_default()
def multiplex_layer(input, name=None, layer_attr=None):
"""
This layer multiplex multiple layers according to the index,
which is provided by the first input layer.
inputs[0]: the index of the layer to output of size batchSize.
inputs[1:N]; the candidate output data.
For each index i from 0 to batchSize -1, the output is the i-th row of the
(index[i] + 1)-th layer.
For each i-th row of output:
.. math::
y[i][j] = x_{x_{0}[i] + 1}[i][j], j = 0,1, ... , (x_{1}.width - 1)
where, y is output. :math:`x_{k}` is the k-th input layer and
:math:`k = x_{0}[i] + 1`.
The example usage is:
.. code-block:: python
maxid = multiplex_layer(input=layers)
:param input: Input layers.
:type input: list of LayerOutput
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, collections.Sequence)
assert len(input) > 2, 'multiplex_layer should have more than 2 inputs'
for i in range(1, len(input)):
assert isinstance(input[i], LayerOutput)
assert input[i].size == input[1].size, \
"All the input layers except the first one should have the same size"
l = Layer(
name=name,
type='multiplex',
inputs=[x.name for x in input],
size=input[1].size,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.MULTIPLEX_LAYER,
parents=input,
size=l.config.size)
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
@wrap_name_default()
@wrap_act_default(act=LinearActivation())
@wrap_param_attr_default()
@layer_support(DROPOUT)
def row_conv_layer(input,
context_len,
act=None,
name=None,
param_attr=None,
layer_attr=None):
"""
The row convolution is called lookahead convolution. It is firstly
introduced in paper of `Deep Speech 2: End-toEnd Speech Recognition
in English and Mandarin <https://arxiv.org/pdf/1512.02595v1.pdf>`_ .
The bidirectional RNN that learns representation for a sequence by
performing a forward and a backward pass through the entire sequence.
However, unlike unidirectional RNNs, bidirectional RNNs are challenging
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
\quad \text{for} \quad (1 \leq i \leq d)
Note:
The `context_len` is `k + 1`. That is to say, the lookahead step
number plus one equals context_len.
.. code-block:: python
row_conv = row_conv_layer(input=input_layer, context_len=3)
:param input: The input layer.
:type input: LayerOutput
:param context_len: The context length equals the lookahead step number
plus one.
:type context_len: int
:param act: Activation Type. Default is linear activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute. If None, the parameter will be
initialized smartly. It's better set it by yourself.
:type param_attr: ParameterAttribute
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
assert context_len > 0, "the context_len must be greatet than 0."
Layer(
inputs=[Input(input.name, **param_attr.attr)],
name=name,
context_length=context_len,
type=LayerType.ROW_CONV_LAYER,
active_type=act.name,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.ROW_CONV_LAYER, input, activation=act, size=input.size)
@layer_support()
@wrap_name_default()
@wrap_param_attr_default()
def prelu_layer(input,
name=None,
partial_sum=1,
param_attr=None,
layer_attr=None):
"""
The Parameter Relu activation that actives outputs with a learnable weight.
Reference:
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
.. math::
z_i &\\quad if \\quad z_i > 0 \\\\
a_i * z_i &\\quad \\mathrm{otherwise}
The example usage is:
.. code-block:: python
prelu = prelu_layer(input=layers, partial_sum=1)
:param name: Name of this layer.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param partial_sum: this parameter makes a group of inputs share a same weight.
- partial_sum = 1, indicates the element-wise activation: each element has a weight.
- partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight.
- partial_sum = number of outputs, indicates all elements share a same weight.
:type partial_sum: int
:param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute|None
:param layer_attr: Extra layer configurations. Default is None.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
assert isinstance(param_attr, ParameterAttribute)
l = Layer(
name=name,
type=LayerType.PRELU,
inputs=Input(input.name, **param_attr.attr),
partial_sum=partial_sum,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.PRELU,
parents=input,
size=l.config.size)
......@@ -26,10 +26,10 @@ from paddle.trainer.config_parser import *
__all__ = [
'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
"img_conv_bn_pool", 'dropout_layer', 'lstmemory_group', 'lstmemory_unit',
'small_vgg', 'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group',
'simple_gru', 'simple_attention', 'simple_gru2', 'bidirectional_gru',
'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
"img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru',
'simple_attention', 'simple_gru2', 'bidirectional_gru', 'text_conv_pool',
'bidirectional_lstm', 'inputs', 'outputs'
]
######################################################
......@@ -1366,29 +1366,6 @@ def simple_attention(encoded_sequence,
input=scaled, pooling_type=SumPooling(), name="%s_pooling" % name)
############################################################################
# Miscs #
############################################################################
@wrap_name_default("dropout")
def dropout_layer(input, dropout_rate, name=None):
"""
@TODO(yuyang18): Add comments.
:param name:
:param input:
:param dropout_rate:
:return:
"""
return addto_layer(
name=name,
input=input,
act=LinearActivation(),
bias_attr=False,
layer_attr=ExtraAttr(drop_rate=dropout_rate))
def inputs(layers, *args):
"""
Declare the inputs of network. The order of input should be as same as
......
......@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1)
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv)
export whole_configs=(test_split_datasource)
......@@ -6,7 +6,7 @@ din = data_layer(name='data', size=30)
seq_op = [first_seq, last_seq]
agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
opts = []
......@@ -15,6 +15,7 @@ for op in seq_op:
opts.append(op(input=din, agg_level=al))
for op in seq_op:
opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5))
opts.append(
op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5))
outputs(opts)
type: "nn"
layers {
name: "index"
type: "data"
size: 1
active_type: ""
}
layers {
name: "data1"
type: "data"
size: 30
active_type: ""
}
layers {
name: "data2"
type: "data"
size: 30
active_type: ""
}
layers {
name: "data3"
type: "data"
size: 30
active_type: ""
}
layers {
name: "__multiplex_layer_0__"
type: "multiplex"
size: 30
active_type: ""
inputs {
input_layer_name: "index"
}
inputs {
input_layer_name: "data1"
}
inputs {
input_layer_name: "data2"
}
inputs {
input_layer_name: "data3"
}
}
input_layer_names: "index"
input_layer_names: "data1"
input_layer_names: "data2"
input_layer_names: "data3"
output_layer_names: "__multiplex_layer_0__"
sub_models {
name: "root"
layer_names: "index"
layer_names: "data1"
layer_names: "data2"
layer_names: "data3"
layer_names: "__multiplex_layer_0__"
input_layer_names: "index"
input_layer_names: "data1"
input_layer_names: "data2"
input_layer_names: "data3"
output_layer_names: "__multiplex_layer_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__prelu_layer_0__"
type: "prelu"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
input_parameter_name: "___prelu_layer_0__.w0"
}
}
parameters {
name: "___prelu_layer_0__.w0"
size: 300
initial_mean: 0.0
initial_std: 0.057735026919
initial_strategy: 0
initial_smart: true
}
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__prelu_layer_0__"
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 2560
active_type: ""
}
layers {
name: "__row_conv_layer_0__"
type: "maxout"
size: 2560
active_type: "relu"
inputs {
input_layer_name: "data"
input_parameter_name: "___row_conv_layer_0__.w0"
row_conv_conf {
context_length: 19
}
}
}
parameters {
name: "___row_conv_layer_0__.w0"
size: 48640
initial_mean: 0.0
initial_std: 0.229415733871
dims: 19
dims: 2560
initial_strategy: 0
initial_smart: true
}
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__row_conv_layer_0__"
input_layer_names: "data"
output_layer_names: "__row_conv_layer_0__"
is_recurrent_layer_group: false
}
......@@ -9,4 +9,6 @@ outputs(
expand_layer(
input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE),
expand_layer(
input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_TIMESTEP))
input=din,
expand_as=data_seq,
expand_level=ExpandLevel.FROM_NO_SEQUENCE))
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
index = data_layer(name='index', size=1)
din1 = data_layer(name='data1', size=30)
din2 = data_layer(name='data2', size=30)
din3 = data_layer(name='data3', size=30)
dout = multiplex_layer([index, din1, din2, din3])
outputs(dout)
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
prelu = prelu_layer(input=data)
outputs(prelu)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2560)
row_conv = row_conv_layer(input=data, context_len=19, act=ReluActivation())
outputs(row_conv)
......@@ -6,7 +6,7 @@ din = data_layer(name='dat_in', size=100)
POOL_TYPE = [MaxPooling, AvgPooling, SumPooling]
AGG_LEVEL = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
AGG_LEVEL = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
opts = []
......
......@@ -12,7 +12,7 @@ from paddle.trainer.config_parser import logger
try:
import cv2
except ImportError:
logger.warning("OpenCV2 is not installed, using PIL to prcoess")
logger.warning("OpenCV2 is not installed, using PIL to process")
cv2 = None
__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"]
......
......@@ -39,6 +39,10 @@ def make_layer_label(layer_config):
def make_diagram(config_file, dot_file, config_arg_str):
config = parse_config(config_file, config_arg_str)
make_diagram_from_proto(config.model_config, dot_file)
def make_diagram_from_proto(model_config, dot_file):
# print >> sys.stderr, config
name2id = {}
f = open(dot_file, 'w')
......@@ -59,12 +63,12 @@ def make_diagram(config_file, dot_file, config_arg_str):
print >> f, 'digraph graphname {'
print >> f, 'node [width=0.375,height=0.25];'
for i in xrange(len(config.model_config.layers)):
l = config.model_config.layers[i]
for i in xrange(len(model_config.layers)):
l = model_config.layers[i]
name2id[l.name] = i
i = 0
for sub_model in config.model_config.sub_models:
for sub_model in model_config.sub_models:
if sub_model.name == 'root':
continue
print >> f, 'subgraph cluster_%s {' % i
......@@ -78,18 +82,18 @@ def make_diagram(config_file, dot_file, config_arg_str):
for layer_name in sub_model.layer_names:
submodel_layers.add(layer_name)
lid = name2id[layer_name]
layer_config = config.model_config.layers[lid]
layer_config = model_config.layers[lid]
label = make_layer_label(layer_config)
print >> f, 'l%s [label="%s", shape=box];' % (lid, label)
print >> f, '}'
for i in xrange(len(config.model_config.layers)):
l = config.model_config.layers[i]
for i in xrange(len(model_config.layers)):
l = model_config.layers[i]
if l.name not in submodel_layers:
label = make_layer_label(l)
print >> f, 'l%s [label="%s", shape=box];' % (i, label)
for sub_model in config.model_config.sub_models:
for sub_model in model_config.sub_models:
if sub_model.name == 'root':
continue
for link in sub_model.in_links:
......@@ -99,8 +103,8 @@ def make_diagram(config_file, dot_file, config_arg_str):
for mem in sub_model.memories:
print >> f, make_mem(mem)
for i in xrange(len(config.model_config.layers)):
for l in config.model_config.layers[i].inputs:
for i in xrange(len(model_config.layers)):
for l in model_config.layers[i].inputs:
print >> f, 'l%s -> l%s [label="%s"];' % (
name2id[l.input_layer_name], i, l.input_parameter_name)
......
......@@ -27,6 +27,7 @@ from . import dataset
from . import reader
from . import plot
import attr
import op
import pooling
import inference
import networks
......
......@@ -14,206 +14,55 @@
import collections
import re
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import paddle.trainer_config_helpers as conf_helps
from topology import Topology
class LayerType(type):
def __new__(cls, name, bases, attrs):
method_name = attrs.get('METHOD_NAME', None)
if method_name is not None:
method = getattr(conf_helps, method_name)
if method.__doc__ is not None:
mapper = attrs.get("__map_docstr__", None)
if mapper is not None:
attrs['__doc__'] = LayerType.__map_docstr__(
mapper(method.__doc__),
method_name=method_name,
name=name)
else:
attrs['__doc__'] = LayerType.__map_docstr__(
method.__doc__, method_name=method_name, name=name)
return super(LayerType, cls).__new__(cls, name, bases, attrs)
@staticmethod
def __map_docstr__(doc, name, method_name):
assert isinstance(doc, basestring)
# replace LayerOutput to paddle.v2.config_base.Layer
doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
doc = doc.replace('ParameterAttribute',
'paddle.v2.attr.ParameterAttribute')
doc = re.sub(r'ExtraLayerAttribute[^\s]?',
'paddle.v2.attr.ExtraAttribute', doc)
# xxx_layer to xxx
doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
# XxxxActivation to paddle.v2.Activation.Xxxx
doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
r"paddle.v2.Activation.\g<name>", doc)
# TODO(yuyang18): Add more rules if needed.
__layer_map__ = {}
def __map_docstr__(doc, name):
if doc is None:
return doc
assert isinstance(doc, basestring)
# replace LayerOutput to paddle.v2.config_base.Layer
doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
doc = doc.replace('ParameterAttribute', 'paddle.v2.attr.ParameterAttribute')
doc = re.sub(r'ExtraLayerAttribute[^\s]?', 'paddle.v2.attr.ExtraAttribute',
doc)
# xxx_layer to xxx
doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
# XxxxActivation to paddle.v2.activation.Xxxx
doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
r"paddle.v2.activation.\g<name>", doc)
# xxx_evaluator to paddle.v2.evaluator.xxx
doc = re.sub(r"(?P<name>[a-z]+)_evaluator", r"evaluator.\g<name>", doc)
# TODO(yuyang18): Add more rules if needed.
return doc
def __convert_to_v2__(f, name, module):
def wrapped(*args, **xargs):
out = f(*args, **xargs)
outs = out
if not isinstance(out, collections.Sequence):
outs = [out]
for l in outs:
if isinstance(l, conf_helps.LayerOutput):
__layer_map__[l.full_name] = l
return out
wrapped.__doc__ = __map_docstr__(f.__doc__, name)
wrapped.__name__ = name
wrapped.__module__ = module
return wrapped
class Layer(object):
__metaclass__ = LayerType
def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict)
self.name = name
self.__context__ = {}
self.__parent_layers__ = parent_layers
# some layer may have some extra parent layer
self.__extra_parent__ = []
# used for evaluator.
self.__children_layers__ = []
def extra_parent(self):
return self.__extra_parent__
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names))
def to_proto(self, context):
"""
function to set proto attribute
"""
self.__context__ = context
# STEP: short cut if this layer is parsed before.
if self.context_name() in context:
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for p in self.__extra_parent__:
p.to_proto(context=context)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
# STEP: parse myself and add myself into context.
ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None \
and self.context_name() not in context:
context[self.context_name()] = ret_val
# STEP: parse children that should be pased after this layer.
for layer, pnames in self.__children_layers__:
drop = False
# child will only be parsed if all parents are in context.
for pname in pnames:
if pname not in context:
drop = True
break
if drop:
continue
layer.to_proto(context=context)
# STEP: return v1 layer result
if self.context_name() is None:
return ret_val
elif self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
def to_proto_impl(self, **kwargs):
raise NotImplementedError()
def context_name(self):
"""
Context name means the context which stores `to_proto_impl` result.
If multiple layer share same context_name, the `to_proto_impl` of them
will be invoked only once.
"""
return self.name
def use_context_name(self):
return False
def calculate_size(self):
"""
lazy calculate size of the layer, should be called when to_proto_impl of
this layer is called.
:return:
"""
return self.__context__[self.context_name()].size
def attr(self):
topo = Topology(self)
return topo.get_layer_proto(self.name)
def __convert_to_v2__(method_name,
parent_names,
is_default_name=True,
attach_parent=False):
if is_default_name:
wrapper = wrap_name_default(name_prefix=method_name)
else:
wrapper = None
class V2LayerImpl(Layer):
METHOD_NAME = method_name
def __init__(self, **kwargs):
parent_layers = dict()
other_kwargs = dict()
for pname in parent_names:
if pname in kwargs:
parent_layers[pname] = kwargs[pname]
if attach_parent:
pnames = [x.context_name() for x in parent_layers.values()]
for pname in parent_layers:
layers = kwargs[pname]
if not isinstance(layers, collections.Sequence):
layers = [layers]
for layer in layers:
layer.append_child(self, pnames)
for key in kwargs.keys():
if key not in parent_names:
other_kwargs[key] = kwargs[key]
name = kwargs.get('name', None)
super(V2LayerImpl, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs
if wrapper is not None:
__init__ = wrapper(__init__)
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, method_name)(**args)
return V2LayerImpl
Layer = conf_helps.LayerOutput
......@@ -16,7 +16,8 @@ import paddle.trainer.PyDataProvider2 as pydp2
import_list = [
nm for nm in dir(pydp2)
if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm or
'array' in nm)
]
import_list.extend(['InputType'])
......
......@@ -24,8 +24,9 @@ import conll05
import uci_housing
import sentiment
import wmt14
import mq2007
__all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing', 'wmt14'
'uci_housing', 'wmt14', 'mq2007'
]
......@@ -19,8 +19,10 @@ import shutil
import sys
import importlib
import paddle.v2.dataset
import cPickle
import glob
__all__ = ['DATA_HOME', 'download', 'md5file']
__all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader']
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
......@@ -74,3 +76,130 @@ def fetch_all():
getattr(
importlib.import_module("paddle.v2.dataset.%s" % module_name),
"fetch")()
def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
"""
you can call the function as:
split(paddle.v2.dataset.cifar.train10(), line_count=1000,
suffix="imikolov-train-%05d.pickle")
the output files as:
|-imikolov-train-00000.pickle
|-imikolov-train-00001.pickle
|- ...
|-imikolov-train-00480.pickle
:param reader: is a reader creator
:param line_count: line count for each file
:param suffix: the suffix for the output files, should contain "%d"
means the id for each file. Default is "%05d.pickle"
:param dumper: is a callable function that dump object to file, this
function will be called as dumper(obj, f) and obj is the object
will be dumped, f is a file object. Default is cPickle.dump.
"""
if not callable(dumper):
raise TypeError("dumper should be callable.")
lines = []
indx_f = 0
for i, d in enumerate(reader()):
lines.append(d)
if i >= line_count and i % line_count == 0:
with open(suffix % indx_f, "w") as f:
dumper(lines, f)
lines = []
indx_f += 1
if lines:
with open(suffix % indx_f, "w") as f:
dumper(lines, f)
def cluster_files_reader(files_pattern,
trainer_count,
trainer_id,
loader=cPickle.load):
"""
Create a reader that yield element from the given files, select
a file set according trainer count and trainer_id
:param files_pattern: the files which generating by split(...)
:param trainer_count: total trainer count
:param trainer_id: the trainer rank id
:param loader: is a callable function that load object from file, this
function will be called as loader(f) and f is a file object.
Default is cPickle.load
"""
def reader():
if not callable(loader):
raise TypeError("loader should be callable.")
file_list = glob.glob(files_pattern)
file_list.sort()
my_file_list = []
for idx, fn in enumerate(file_list):
if idx % trainer_count == trainer_id:
print "append file: %s" % fn
my_file_list.append(fn)
for fn in my_file_list:
with open(fn, "r") as f:
lines = loader(f)
for line in lines:
yield line
return reader
def convert(output_path,
reader,
num_shards,
name_prefix,
max_lines_to_shuffle=1000):
import recordio
import cPickle as pickle
import random
"""
Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances.
:param num_shards: the number of shards that the dataset will be partitioned into.
:param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
"""
assert num_shards >= 1
assert max_lines_to_shuffle >= 1
def open_writers():
w = []
for i in range(0, num_shards):
n = "%s/%s-%05d-of-%05d" % (output_path, name_prefix, i,
num_shards - 1)
w.append(recordio.writer(n))
return w
def close_writers(w):
for i in range(0, num_shards):
w[i].close()
def write_data(w, lines):
random.shuffle(lines)
for i, d in enumerate(lines):
d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
w[i % num_shards].write(d)
w = open_writers()
lines = []
for i, d in enumerate(reader()):
lines.append(d)
if i % max_lines_to_shuffle == 0 and i >= max_lines_to_shuffle:
write_data(w, lines)
lines = []
continue
write_data(w, lines)
close_writers(w)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module will download dataset from
http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html
and parse train/test set intopaddle reader creators.
This set contains images of flowers belonging to 102 different categories.
The images were acquired by searching the web and taking pictures. There are a
minimum of 40 images for each category.
The database was used in:
Nilsback, M-E. and Zisserman, A. Automated flower classification over a large
number of classes.Proceedings of the Indian Conference on Computer Vision,
Graphics and Image Processing (2008)
http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
"""
import cPickle
import itertools
from common import download
import tarfile
import scipy.io as scio
from paddle.v2.image import *
import os
import numpy as np
import paddle.v2 as paddle
from multiprocessing import cpu_count
__all__ = ['train', 'test', 'valid']
DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat'
SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
def default_mapper(sample):
'''
map image bytes data to type needed by model input layer
'''
img, label = sample
img = paddle.image.load_image_bytes(img)
img = paddle.image.simple_transform(img, 256, 224, True)
return img.flatten().astype('float32'), label
def reader_creator(data_file,
label_file,
setid_file,
dataset_name,
mapper=default_mapper,
buffered_size=1024):
'''
1. read images from tar file and
merge images into batch files in 102flowers.tgz_batch/
2. get a reader to read sample from batch file
:param data_file: downloaded data file
:type data_file: string
:param label_file: downloaded label file
:type label_file: string
:param setid_file: downloaded setid file containing information
about how to split dataset
:type setid_file: string
:param dataset_name: data set name (tstid|trnid|valid)
:type dataset_name: string
:param mapper: a function to map image bytes data to type
needed by model input layer
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: data reader
:rtype: callable
'''
labels = scio.loadmat(label_file)['labels'][0]
indexes = scio.loadmat(setid_file)[dataset_name][0]
img2label = {}
for i in indexes:
img = "jpg/image_%05d.jpg" % i
img2label[img] = labels[i - 1]
file_list = batch_images_from_tar(data_file, dataset_name, img2label)
def reader():
for file in open(file_list):
file = file.strip()
batch = None
with open(file, 'r') as f:
batch = cPickle.load(f)
data = batch['data']
labels = batch['label']
for sample, label in itertools.izip(data, batch['label']):
yield sample, int(label)
return paddle.reader.xmap_readers(mapper, reader,
cpu_count(), buffered_size)
def train(mapper=default_mapper, buffered_size=1024):
'''
Create flowers training set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: train data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper,
buffered_size)
def test(mapper=default_mapper, buffered_size=1024):
'''
Create flowers test set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: test data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper,
buffered_size)
def valid(mapper=default_mapper, buffered_size=1024):
'''
Create flowers validation set reader.
It returns a reader, each sample in the reader is
image pixels in [0, 1] and label in [1, 102]
translated from original color image by steps:
1. resize to 256*256
2. random crop to 224*224
3. flatten
:param mapper: a function to map sample.
:type mapper: callable
:param buffered_size: the size of buffer used to process images
:type buffered_size: int
:return: test data reader
:rtype: callable
'''
return reader_creator(
download(DATA_URL, 'flowers', DATA_MD5),
download(LABEL_URL, 'flowers', LABEL_MD5),
download(SETID_URL, 'flowers', SETID_MD5), 'valid', mapper,
buffered_size)
def fetch():
download(DATA_URL, 'flowers', DATA_MD5)
download(LABEL_URL, 'flowers', LABEL_MD5)
download(SETID_URL, 'flowers', SETID_MD5)
......@@ -15,6 +15,7 @@
import paddle.v2.dataset.common
import unittest
import tempfile
import glob
class TestCommon(unittest.TestCase):
......@@ -32,6 +33,62 @@ class TestCommon(unittest.TestCase):
paddle.v2.dataset.common.download(
yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
def test_split(self):
def test_reader():
def reader():
for x in xrange(10):
yield x
return reader
_, temp_path = tempfile.mkstemp()
paddle.v2.dataset.common.split(
test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
files = glob.glob(temp_path + '/test-%05d.pickle')
self.assertEqual(len(files), 3)
def test_cluster_file_reader(self):
_, temp_path = tempfile.mkstemp()
for x in xrange(5):
with open(temp_path + '/%05d.test' % x) as f:
f.write('%d\n' % x)
reader = paddle.v2.dataset.common.cluster_files_reader(
temp_path + '/*.test', 5, 0)
for idx, e in enumerate(reader()):
self.assertEqual(e, str("0"))
def test_convert(self):
record_num = 10
num_shards = 4
def test_reader():
def reader():
for x in xrange(record_num):
yield x
return reader
path = tempfile.mkdtemp()
paddle.v2.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
files = glob.glob(path + '/random_images-*')
self.assertEqual(len(files), num_shards)
recs = []
for i in range(0, num_shards):
n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
r = recordio.reader(n)
while True:
d = r.read()
if d is None:
break
recs.append(d)
recs.sort()
self.assertEqual(total, record_num)
if __name__ == '__main__':
unittest.main()
......@@ -12,27 +12,40 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.trainer.config_parser as config_parser
'''
This file is a wrapper of formal config_parser. The main idea of this file is to
separete different config logic into different function, such as network configuration
and optimizer configuration.
'''
__all__ = [
"parse_trainer_config", "parse_network_config", "parse_optimizer_config"
]
def parse_trainer_config(trainer_conf, config_arg_str):
return config_parser.parse_config(trainer_conf, config_arg_str)
def parse_network_config(network_conf):
config = config_parser.parse_config(network_conf, '')
return config.model_config
def parse_optimizer_config(optimizer_conf):
config = config_parser.parse_config(optimizer_conf, '')
return config.opt_config
import paddle.v2.dataset.flowers
import unittest
class TestFlowers(unittest.TestCase):
def check_reader(self, reader):
sum = 0
label = 0
size = 224 * 224 * 3
for l in reader():
self.assertEqual(l[0].size, size)
if l[1] > label:
label = l[1]
sum += 1
return sum, label
def test_train(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.train())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
def test_test(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.test())
self.assertEqual(instances, 6149)
self.assertEqual(max_label_value, 102)
def test_valid(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.valid())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
if __name__ == '__main__':
unittest.main()
......@@ -13,8 +13,8 @@
# limitations under the License.
import paddle.trainer_config_helpers.evaluators as evs
import inspect
from config_base import __convert_to_v2__
import inspect
__all__ = []
......@@ -25,21 +25,10 @@ def initialize():
for __ev_name__ in filter(lambda x: x.endswith('_evaluator'), evs.__all__):
__ev__ = getattr(evs, __ev_name__)
if hasattr(__ev__, 'argspec'):
argspec = __ev__.argspec
else:
argspec = inspect.getargspec(__ev__)
parent_names = filter(lambda x: x in ['input', 'label', 'weight'],
argspec.args)
v2_ev = __convert_to_v2__(
__ev_name__,
parent_names=parent_names,
is_default_name='name' in argspec.args,
attach_parent=True)
__new_name__ = convert_to_new_name(__ev_name__)
globals()[__new_name__] = v2_ev
globals()[__new_name__] = __convert_to_v2__(__ev__, __new_name__,
__name__)
globals()[__new_name__].__name__ = __new_name__
__all__.append(__new_name__)
......
import numpy as np
try:
import cv2
except:
print(
"import cv2 error, please install opencv-python: pip install opencv-python"
)
except ImportError:
cv2 = None
import os
import tarfile
import cPickle
__all__ = [
"load_image", "resize_short", "to_chw", "center_crop", "random_crop",
"left_right_flip", "simple_transform", "load_and_transform"
"load_image_bytes", "load_image", "resize_short", "to_chw", "center_crop",
"random_crop", "left_right_flip", "simple_transform", "load_and_transform",
"batch_images_from_tar"
]
"""
This file contains some common interfaces for image preprocess.
......@@ -28,6 +30,90 @@ the image layout as follows.
"""
def batch_images_from_tar(data_file,
dataset_name,
img2label,
num_per_batch=1024):
"""
Read images from tar file and batch them into batch file.
param data_file: path of image tar file
type data_file: string
param dataset_name: 'train','test' or 'valid'
type dataset_name: string
param img2label: a dic with image file name as key
and image's label as value
type img2label: dic
param num_per_batch: image number per batch file
type num_per_batch: int
return: path of list file containing paths of batch file
rtype: string
"""
batch_dir = data_file + "_batch"
out_path = "%s/%s" % (batch_dir, dataset_name)
meta_file = "%s/%s.txt" % (batch_dir, dataset_name)
if os.path.exists(out_path):
return meta_file
else:
os.makedirs(out_path)
tf = tarfile.open(data_file)
mems = tf.getmembers()
data = []
labels = []
file_id = 0
for mem in mems:
if mem.name in img2label:
data.append(tf.extractfile(mem).read())
labels.append(img2label[mem.name])
if len(data) == num_per_batch:
output = {}
output['label'] = labels
output['data'] = data
cPickle.dump(
output,
open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL)
file_id += 1
data = []
labels = []
if len(data) > 0:
output = {}
output['label'] = labels
output['data'] = data
cPickle.dump(
output,
open('%s/batch_%d' % (out_path, file_id), 'w'),
protocol=cPickle.HIGHEST_PROTOCOL)
with open(meta_file, 'a') as meta:
for file in os.listdir(out_path):
meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n")
return meta_file
def load_image_bytes(bytes, is_color=True):
"""
Load an color or gray image from bytes array.
Example usage:
.. code-block:: python
with open('cat.jpg') as f:
im = load_image_bytes(f.read())
:param bytes: the input image bytes array.
:type file: str
:param is_color: If set is_color True, it will load and
return a color image. Otherwise, it will
load and return a gray image.
"""
flag = 1 if is_color else 0
file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
img = cv2.imdecode(file_bytes, flag)
return img
def load_image(file, is_color=True):
"""
Load an color or gray image from the file path.
......
......@@ -12,9 +12,9 @@ class Inference(object):
"""
Inference combines neural network output and parameters together
to do inference.
.. code-block:: python
inferer = Inference(output_layer=prediction, parameters=parameters)
for data_batch in batches:
print inferer.infer(data_batch)
......@@ -92,8 +92,8 @@ def infer(output_layer, parameters, input, feeding=None, field='value'):
.. code-block:: python
result = paddle.infer(output_layer=prediction,
parameters=parameters,
result = paddle.infer(output_layer=prediction,
parameters=parameters,
input=SomeData)
print result
......@@ -101,14 +101,14 @@ def infer(output_layer, parameters, input, feeding=None, field='value'):
.. code-block:: python
result = paddle.infer(output_layer=[prediction1, prediction2],
parameters=parameters,
result = paddle.infer(output_layer=[prediction1, prediction2],
parameters=parameters,
input=SomeData,
field=[id, value]])
print result
:param output_layer: output of the neural network that would be inferred
:type output_layer: paddle.v2.config_base.Layer or a list of
:type output_layer: paddle.v2.config_base.Layer or a list of
paddle.v2.config_base.Layer
:param parameters: parameters of the neural network.
:type parameters: paddle.v2.parameters.Parameters
......@@ -117,14 +117,14 @@ def infer(output_layer, parameters, input, feeding=None, field='value'):
:type input: collections.Iterable
:param feeding: Reader dictionary. Default could generate from input
value.
:param field: The prediction field. It should in [`value`, `id`, `prob`].
`value` and `prob` mean return the prediction probabilities,
:param field: The prediction field. It should in [`value`, `id`, `prob`].
`value` and `prob` mean return the prediction probabilities,
`id` means return the prediction labels. Default is `value`.
Note that `prob` only used when output_layer is beam_search
Note that `prob` only used when output_layer is beam_search
or max_id.
:type field: str
:return: The prediction result. If there are multiple outout_layers and fields,
the return order is outout_layer1.field1, outout_layer2.field1, ...,
:return: The prediction result. If there are multiple outout_layers and fields,
the return order is outout_layer1.field1, outout_layer2.field1, ...,
outout_layer1.field2, outout_layer2.field2 ...
:rtype: numpy.ndarray
"""
......
......@@ -13,7 +13,7 @@
# limitations under the License.
"""
`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
we want to make Paddle a plain Python package. The model config package defined
we want to make Paddle a plain Python package. The model config package defines
the way how to configure a neural network topology in Paddle Python code.
The primary usage shows below.
......@@ -30,392 +30,33 @@ The primary usage shows below.
# use prediction instance where needed.
parameters = paddle.parameters.create(cost)
"""
import collections
import inspect
import copy
import re
import paddle.trainer_config_helpers.layers as v1_layers
import paddle.trainer.config_parser as cp
from paddle.proto.ModelConfig_pb2 import ModelConfig, SubModelConfig
from config_base import __convert_to_v2__
import config_base
import paddle.trainer_config_helpers as conf_helps
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import RecurrentLayerGroupSetGenerator, Generator
from paddle.trainer_config_helpers.layers import layer_support
import activation
import attr
import data_type
from config_base import Layer, __convert_to_v2__
__all__ = ['data', 'parse_network']
__all__ = ['parse_network', 'data']
def __need_to_keep__(name):
return name in [
'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
'layer_support'
]
def parse_network(output_layers, extra_layers=None):
"""
Parse all layers in the neural network graph and
then generate a ModelConfig object.
.. note::
This function is used internally in paddle.v2 module. User should never
invoke this method.
:param output_layers: Output layers.
:type output_layers: Layer
:param extra_layers: Some layers in the neural network graph are not in the
path of output_layers.
:type extra_layers: Layer
:return: A ModelConfig object instance.
:rtype: ModelConfig
"""
if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers]
if extra_layers is not None and not isinstance(extra_layers,
collections.Sequence):
extra_layers = [extra_layers]
def __real_func__():
"""
__real_func__ is the function that config_parser.parse invoked. It is
the plain old paddle configuration function.
"""
context = dict()
real_output = [each.to_proto(context=context) for each in output_layers]
if extra_layers is not None:
extra_output = [
each.to_proto(context=context) for each in extra_layers
]
conf_helps.outputs(real_output)
return __parse__(__real_func__)
def __need_to_wrap__(name):
return name not in ['AggregateLevel', 'ExpandLevel']
"""
Some layer may need some special config, and can not use __convert_to_v2__ to convert.
So we also need to implement some special LayerV2.
"""
class DataLayerV2(Layer):
METHOD_NAME = 'data_layer'
def __init__(self, name, type, **kwargs):
assert isinstance(type, data_type.InputType)
self.type = type
self.__method_name__ = 'data_layer'
self.__kwargs__ = kwargs
super(DataLayerV2, self).__init__(name=name, parent_layers=dict())
def to_proto_impl(self, **kwargs):
args = dict()
args['size'] = self.type.dim
for each in kwargs:
args[each] = kwargs[each]
for each in self.__kwargs__:
args[each] = self.__kwargs__[each]
return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
def __map_docstr__(doc):
doc = re.sub(r'(data = [^\)]+)\).*',
"data = paddle.layer.data(name=\"input\", "
"type=paddle.data_type.dense_vector(1000))", doc)
doc = re.sub(r':param size:.*',
':param type: Data type of this data layer', doc)
doc = re.sub(r':type size:.*',
":type size: paddle.v2.data_type.InputType", doc)
return doc
class MemoryV2(Layer):
def __init__(self, name, extra_input=None, **kwargs):
"""
Init memory object, if memory is inited inside recurrent_group step
function, it may depend on a boot_layer that should be initialized
outside recurrent_group, so we:
1. add RecurrentLayerInput to extra_parent of self.
2. add boot_layer to the extra_parent of RecurrentLayerInput.
:param extra_input: list of RecurrentLayerInput
:type extra_input: [RecurrentLayerInput]
"""
self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs
self.__boot_layer_name__ = None
if 'boot_layer' in kwargs:
begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
# function inside step.
st = inspect.stack()
for i in xrange(len(st)):
locs = inspect.stack()[i][0].f_locals
keys = locs.keys()
for key in keys:
val = locs[key]
if isinstance(val, RecurrentLayerInput):
begin_of_current_rnn.append(val)
elif isinstance(val, collections.Sequence):
for v in val:
if isinstance(v, RecurrentLayerInput):
begin_of_current_rnn.append(v)
if begin_of_current_rnn:
break
assert begin_of_current_rnn is not None
for extra in begin_of_current_rnn:
self.append_extra_parent(extra)
extra.append_extra_parent(kwargs['boot_layer'])
self.__boot_layer_name__ = kwargs['boot_layer'].name
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__kwargs__:
args[each] = self.__kwargs__[each]
if self.__boot_layer_name__ is not None:
args['boot_layer'] = self.__context__[self.__boot_layer_name__]
size = args.get('size', None)
if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return conf_helps.memory(name=self.name, **args)
def context_name(self):
return self.name + "#memory"
def use_context_name(self):
"""
memory layer will have the same name with some layer
:return:
"""
return True
class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2)
self.name = input.name
self.input = input
self.is_seq = is_seq
self.size = size
# TODO(add size check)
# assert input.size is not None or size is not None
class BaseGeneratedInputV2(object):
def __init__(self):
self.bos_id = None
self.eos_id = None
def before_real_step(self):
raise NotImplementedError()
def after_real_step(self, *args):
raise NotImplementedError()
class GeneratedInputV2(BaseGeneratedInputV2):
def __init__(self, size, embedding_name, embedding_size):
super(GeneratedInputV2, self).__init__()
self.size = size
self.embedding_name = embedding_name
self.embedding_size = embedding_size
def after_real_step(self, input):
return max_id(input=input, name='__beam_search_predict__')
def before_real_step(self):
predict_id = memory(
name='__beam_search_predict__',
size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = embedding(
input=predict_id,
size=self.embedding_size,
param_attr=attr.ParamAttr(name=self.embedding_name))
return trg_emb
class RecurrentLayerGroupSetGeneratorV2(Layer):
def __init__(self, eos_name, max_length, beam_size, num_results_per_sample):
self.eos_name = eos_name
self.max_length = max_length
self.beam_size = beam_size
self.num_results_per_sample = num_results_per_sample
super(RecurrentLayerGroupSetGeneratorV2, self).__init__(
name=eos_name, parent_layers={})
def to_proto_impl(self, **kwargs):
RecurrentLayerGroupSetGenerator(
Generator(
eos_layer_name=self.eos_name,
max_num_frames=self.max_length,
beam_size=self.beam_size,
num_results_per_sample=self.num_results_per_sample))
return self
def context_name(self):
return self.eos_name + ".fake"
def use_context_name(self):
return True
class MixedLayerV2(Layer):
"""
This class is use to support `with` grammar. If not, the following code
could convert mixed_layer simply.
mixed = __convert_to_v2__(
'mixed_layer', name_prefix='mixed', parent_names=['input'])
"""
class AddToSealedMixedLayerExceptionV2(Exception):
pass
def __init__(self,
size=0,
input=None,
name=None,
act=None,
bias_attr=None,
layer_attr=None):
self.__method_name__ = 'mixed_layer'
self.finalized = False
self.__inputs__ = []
if input is not None:
self.__inputs__ = input
other_kwargs = dict()
other_kwargs['name'] = name
other_kwargs['size'] = size
other_kwargs['act'] = act
other_kwargs['bias_attr'] = bias_attr
other_kwargs['layer_attr'] = layer_attr
parent_layers = {"input": self.__inputs__}
super(MixedLayerV2, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs
def __iadd__(self, other):
if not self.finalized:
self.__inputs__.append(other)
return self
else:
raise MixedLayerV2.AddToSealedMixedLayerExceptionV2()
def __enter__(self):
assert len(self.__inputs__) == 0
return self
def __exit__(self, *args, **kwargs):
self.finalized = True
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
size = args.get('size', None)
if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return getattr(conf_helps, self.__method_name__)(**args)
@wrap_name_default("mixed")
@wrap_act_default(act=activation.Linear())
@wrap_bias_attr_default(has_bias=False)
@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT)
def mixed(size=0,
name=None,
input=None,
act=None,
bias_attr=False,
layer_attr=None):
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
mixed.__doc__ = conf_helps.mixed_layer.__doc__
class RecurrentLayerInput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
parents_len = len(parent_layers)
assert parents_len <= 1
if parents_len == 0:
self.__parents__ = []
else:
self.__parents__ = parent_layers.values()[0]
self.__recurrent_name__ = recurrent_name
name = self.__parents__[
index].name if index >= 0 else self.context_name()
super(RecurrentLayerInput, self).__init__(
name=name, parent_layers=parent_layers)
def context_name(self):
return self.__recurrent_name__ + ".begin"
def to_proto_impl(self, **kwargs):
model_type('recurrent_nn')
RecurrentLayerGroupWithoutOutLinksBegin(
name=self.__recurrent_name__,
in_links=map(lambda x: x.name, self.__parents__))
return self
class RecurrentLayerOutput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerOutput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
return self.__recurrent_name__ + ".end"
def to_proto_impl(self, **kwargs):
for l in self.__parents__:
RecurrentLayerGroupSetOutLink(l.name)
RecurrentLayerGroupEnd(name=self.__recurrent_name__)
LayerV2 = Layer
data = DataLayerV2
data.__name__ = 'data'
AggregateLevel = conf_helps.AggregateLevel
ExpandLevel = conf_helps.ExpandLevel
memory = MemoryV2
memory.__name__ = 'memory'
memory.__doc__ = conf_helps.memory.__doc__
def __layer_name_mapping__(inname):
if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']:
# Do Not handle these layers
return
elif inname == 'maxid_layer':
def __convert_name__(inname):
if __need_to_keep__(inname):
return inname
if inname == 'maxid_layer':
return 'max_id'
elif inname.endswith('memory') or inname.endswith(
'_seq') or inname.endswith('_sim') or inname == 'hsigmoid':
......@@ -429,187 +70,250 @@ def __layer_name_mapping__(inname):
return inname
elif inname.endswith("_layer"):
return inname[:-len("_layer")]
def __layer_name_mapping_parent_names__(inname):
all_args = getattr(conf_helps, inname).argspec.args
return filter(
lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
'expand_as',
'weights', 'vectors', 'weight', 'score', 'left',
'right', 'output_mem'],
all_args)
def __convert_layer__(_new_name_, _old_name_, _parent_names_):
global __all__
__all__.append(_new_name_)
globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
globals()[new_name].__name__ = new_name
for each_layer_name in dir(conf_helps):
new_name = __layer_name_mapping__(each_layer_name)
if new_name is not None:
parent_names = __layer_name_mapping_parent_names__(each_layer_name)
assert len(parent_names) != 0, each_layer_name
__convert_layer__(new_name, each_layer_name, parent_names)
del parent_names
del new_name
del each_layer_name
@wrap_name_default()
def recurrent_group(step, input, name=None):
if not isinstance(input, collections.Sequence):
input = [input]
non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
input)
actual_input = [
RecurrentLayerInput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_inputs': non_static_inputs})
for i in xrange(len(non_static_inputs))
]
extra_input = None
if len(non_static_inputs) == 0:
extra_input = RecurrentLayerInput(
recurrent_name=name, index=-1, parent_layers={})
def __real_step__(*args):
rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
for static_input in static_inputs:
mem_name = "__%s_memory__" % static_input.input.name
mem = memory(
name=mem_name,
extra_input=extra_input,
is_seq=static_input.is_seq,
size=static_input.input.calculate_size,
boot_layer=static_input.input)
with mixed(
name=mem_name,
size=static_input.input.calculate_size,
act=activation.Identity()) as mix:
mix += identity_projection(input=mem)
rnn_input.insert(input.index(static_input), mix)
return step(*rnn_input)
actual_output = __real_step__(*actual_input)
if not isinstance(actual_output, collections.Sequence):
actual_output = [actual_output]
retv = [
RecurrentLayerOutput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_outputs': actual_output})
for i in xrange(len(actual_output))
]
if len(retv) == 1:
return retv[0]
else:
return retv
recurrent_group.__doc__ = conf_helps.recurrent_group.__doc__
@wrap_name_default()
def beam_search(step,
input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None):
if num_results_per_sample is None:
num_results_per_sample = beam_size
assert num_results_per_sample <= beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, StaticInputV2) or isinstance(input,
BaseGeneratedInputV2):
input = [input]
generated_input_index = -1
real_input = []
for i, each_input in enumerate(input):
assert isinstance(each_input, StaticInputV2) or isinstance(
each_input, BaseGeneratedInputV2)
if isinstance(each_input, BaseGeneratedInputV2):
assert generated_input_index == -1
generated_input_index = i
else:
real_input.append(each_input)
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInputV2)
return inname
gipt.bos_id = bos_id
gipt.eos_id = eos_id
def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name
generator = RecurrentLayerGroupSetGeneratorV2(
eos_name, max_length, beam_size, num_results_per_sample)
for name in v1_layers.__all__:
obj = getattr(v1_layers, name)
new_name = __convert_name__(name)
if callable(obj) and __need_to_wrap__(name):
globals()[new_name] = __convert_to_v2__(obj, new_name, __name__)
else:
globals()[new_name] = obj
__all__.append(new_name)
args = list(args)
before_step_layer = gipt.before_real_step()
before_step_layer.append_child(
layer=generator, parent_names=[before_step_layer.name])
args.insert(generated_input_index, before_step_layer)
predict = gipt.after_real_step(step(*args))
def __data_layer__(name, type, **kwargs):
l = v1_layers.data_layer(name, type.dim, **kwargs)
l.data_type = type
return l
eos_layer = eos(input=predict, eos_id=eos_id, name=eos_name)
predict.append_child(layer=eos_layer, parent_names=[predict.name])
return predict
def __map_data_docstr__(doc):
doc = re.sub(r'(data = [^\)]+)\).*',
"data = paddle.layer.data(name=\"input\", "
"type=paddle.data_type.dense_vector(1000))", doc)
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp = recurrent_group(step=__real_step__, input=real_input, name=name)
doc = re.sub(r':param size:.*', ':param type: Data type of this data layer',
doc)
doc = re.sub(r':type size:.*', ":type size: paddle.v2.data_type.InputType",
doc)
return doc
return tmp
__data_layer__.__doc__ = __map_data_docstr__(v1_layers.data_layer.__doc__)
beam_search.__doc__ = conf_helps.beam_search.__doc__
data = __convert_to_v2__(__data_layer__, 'name', __name__)
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
def __get_used_layers__(output_layers):
layer_names = set()
parents = {}
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
def add_parent(child, parent):
if child in parents:
parents[child].append(parent)
else:
parents[child] = [parent]
def add_additional_parents():
for sub_model in cp.g_config.model_config.sub_models:
if sub_model.name == 'root':
continue
for link in sub_model.in_links:
add_parent(link.link_name, link.layer_name)
add_parent(sub_model.name, link.layer_name)
for link in sub_model.out_links:
add_parent(link.link_name, link.layer_name)
add_parent(link.link_name, sub_model.name)
for mem in sub_model.memories:
if mem.boot_layer_name:
add_parent(mem.layer_name, mem.boot_layer_name)
add_parent(mem.link_name, mem.layer_name)
if sub_model.HasField('generator'):
# according to the implementation of text generation
# in recurrent layer group, the generated word must be
# the first out link
add_parent(sub_model.out_links[0].layer_name,
sub_model.generator.eos_layer_name)
def dfs_travel(layer_name):
if layer_name in layer_names:
return
layer_names.add(layer_name)
layer = cp.g_layer_map[layer_name]
for inp in layer.inputs:
dfs_travel(inp.input_layer_name)
if layer.name in parents:
for p in parents[layer.name]:
dfs_travel(p)
add_additional_parents()
for layer in output_layers:
dfs_travel(layer.full_name)
# print layer needs to be specially handled because no other
# layer depends on it. It is used to print the result of some
# layers when running the model for debug purpose. So we explicitly
# add a print layer to the topolty if its input is in the toplogy.
for layer in cp.g_config.model_config.layers:
if layer.type == 'print':
used = True
for inp in layer.inputs:
if inp.input_layer_name not in layer_names:
used = False
break
if used:
layer_names.add(layer.name)
return layer_names
def __get_used_parameters__(layer_names, sub_models):
parameter_names = set()
for name in layer_names:
l = cp.g_layer_map[name]
for inp in l.inputs:
if inp.input_parameter_name:
parameter_names.add(inp.input_parameter_name)
if l.bias_parameter_name:
parameter_names.add(l.bias_parameter_name)
for sub_model in sub_models:
for mem in sub_model.memories:
if mem.HasField("boot_bias_parameter_name"):
parameter_names.add(mem.boot_bias_parameter_name)
return parameter_names
def __get_used_submodels__(layer_names):
submodel_names = set()
for submodel in cp.g_config.model_config.sub_models:
if submodel.name in layer_names:
submodel_names.add(submodel.name)
return submodel_names
def __get_used_evaluators__(layer_names):
evaluator_names = set()
for e in cp.g_config.model_config.evaluators:
used = True
for name in e.input_layers:
if name not in layer_names:
used = False
break
if used:
evaluator_names.add(e.name)
return evaluator_names
def __trim_submodel__(old_submodel, layer_names, input_layer_names,
output_layer_names, evaluator_names):
submodel = SubModelConfig()
submodel.name = old_submodel.name
submodel.layer_names.extend(
filter(lambda x: x in layer_names, old_submodel.layer_names))
submodel.input_layer_names.extend(
filter(lambda x: x in input_layer_names, submodel.layer_names))
submodel.output_layer_names.extend(
filter(lambda x: x in output_layer_names, submodel.layer_names))
submodel.evaluator_names.extend(
filter(lambda x: x in evaluator_names, old_submodel.evaluator_names))
submodel.is_recurrent_layer_group = old_submodel.is_recurrent_layer_group
submodel.reversed = old_submodel.reversed
submodel.memories.extend(
filter(lambda x: x.link_name in layer_names, old_submodel.memories))
target_inlinkid = (old_submodel.target_inlinkid
if old_submodel.HasField('target_inlinkid') else -1)
in_links = []
for i, link in enumerate(old_submodel.in_links):
if link.link_name in layer_names or i == target_inlinkid:
in_links.append(link)
if i == target_inlinkid:
target_inlinkid = len(in_links) - 1
submodel.in_links.extend(in_links)
submodel.out_links.extend(
filter(lambda x: x.link_name in layer_names, old_submodel.out_links))
if old_submodel.HasField('generator'):
submodel.generator.CopyFrom(old_submodel.generator)
if old_submodel.HasField('target_inlinkid'):
submodel.target_inlinkid = target_inlinkid
return submodel
# convert projection
for prj in __projection_names__:
globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False)
globals()[prj].__name__ = prj
# convert operator
operator_list = [
# [V1_method_name, parent_names],
['dotmul_operator', ['a', 'b']],
['conv_operator', ['img', 'filter']]
]
for op in operator_list:
globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False)
globals()[op[0]].__name__ = op[0]
def parse_network(output_layers, extra_layers=None):
if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers]
if extra_layers is not None:
if not isinstance(extra_layers, collections.Sequence):
extra_layers = [extra_layers]
else:
extra_layers = []
layer_names = __get_used_layers__(output_layers + extra_layers)
submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names)
input_layer_names = set()
output_layer_names = set()
model_config = ModelConfig()
model_config.type = cp.g_config.model_config.type
for layer in output_layers:
model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name)
for l in cp.g_config.model_config.layers:
if l.name not in layer_names:
continue
model_config.layers.extend([l])
if l.type == 'data':
if l.name in model_config.output_layer_names:
"""
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
data_layer is sure to be the output of the network in text
generation task, so this statement excludes such a special
data_layer from being inputs of the network, otherwise an error
will occur during data feeding.
"""
continue
model_config.input_layer_names.append(l.name)
input_layer_names.add(l.name)
for e in cp.g_config.model_config.evaluators:
if e.name in evaluator_names:
model_config.evaluators.extend([e])
for s in cp.g_config.model_config.sub_models:
if s.name in submodel_names:
s = __trim_submodel__(s, layer_names, input_layer_names,
output_layer_names, evaluator_names)
model_config.sub_models.extend([s])
parameter_names = __get_used_parameters__(layer_names,
model_config.sub_models)
for p in cp.g_config.model_config.parameters:
if p.name in parameter_names:
model_config.parameters.extend([p])
return model_config
def get_layer(name):
return config_base.__layer_map__.get(name)
cp.begin_parse()
......@@ -24,20 +24,7 @@ def __initialize__():
if each_subnetwork in ['inputs', 'outputs']:
continue
func = getattr(conf_nw, each_subnetwork)
if hasattr(func, 'argspec'):
argspec = func.argspec
else:
argspec = inspect.getargspec(func)
if each_subnetwork == 'simple_attention':
parents = ['encoded_sequence', 'encoded_proj', 'decoder_state']
else:
parents = filter(lambda x: x.startswith('input'), argspec.args)
assert len(parents) != 0, each_subnetwork
v2_subnet = __convert_to_v2__(
each_subnetwork,
parent_names=parents,
is_default_name='name' in argspec.args)
globals()[each_subnetwork] = v2_subnet
globals()[each_subnetwork] = func
globals()[each_subnetwork].__name__ = each_subnetwork
global __all__
__all__.append(each_subnetwork)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import layer
import activation as act
from config_base import Layer
from paddle.trainer_config_helpers.attrs import is_compatible_with
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
__all__ = []
def __register_unary_math_op__(op_name, act):
def op(input, name=None):
return layer.mixed(
input=[layer.identity_projection(input=input)], name=name, act=act)
op = wrap_name_default(op_name)(op)
op.__doc__ = type(act).__doc__
globals()[op_name] = op
__all__.append(op_name)
__register_unary_math_op__('exp', act.Exp())
__register_unary_math_op__('log', act.Log())
__register_unary_math_op__('abs', act.Abs())
__register_unary_math_op__('sigmoid', act.Sigmoid())
__register_unary_math_op__('tanh', act.Tanh())
__register_unary_math_op__('square', act.Square())
__register_unary_math_op__('relu', act.Relu())
__register_unary_math_op__('sqrt', act.Sqrt())
__register_unary_math_op__('reciprocal', act.Reciprocal())
__register_unary_math_op__('softmax', act.Softmax())
def __add__(layeroutput, other):
if is_compatible_with(other, float):
return layer.slope_intercept(input=layeroutput, intercept=other)
if not isinstance(other, Layer):
raise TypeError("Layer can only be added with"
" another Layer or a number")
if layeroutput.size == other.size:
return layer.mixed(input=[
layer.identity_projection(input=layeroutput),
layer.identity_projection(input=other)
])
if other.size != 1 and layeroutput.size != 1:
raise TypeError("Two Layer can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" %
(layeroutput.size, other.size))
elif layeroutput.size == 1:
tmp = layeroutput
layeroutput = other
other = tmp
other = layer.repeat(other, layeroutput.size)
return layer.mixed(input=[
layer.identity_projection(input=layeroutput),
layer.identity_projection(input=other)
])
Layer.__radd__ = __add__
Layer.__add__ = __add__
def __neg__(layeroutput):
return layer.slope_intercept(input=layeroutput, slope=-1.0)
Layer.__neg__ = __neg__
def __sub__(layeroutput, other):
if is_compatible_with(other, float):
return layer.slope_intercept(input=layeroutput, intercept=other)
if not isinstance(other, Layer):
raise TypeError("Layer can only be subtracted with"
" another Layeroutput or a number")
return __add__(layeroutput, -other)
Layer.__sub__ = __sub__
def __rsub__(layeroutput, other):
neg = layer.slope_intercept(input=layeroutput, slope=-1.0)
return __add__(neg, other)
Layer.__rsub__ = __rsub__
def __mul__(layeroutput, other):
if is_compatible_with(other, float):
return layer.slope_intercept(input=layeroutput, slope=other)
if not isinstance(other, Layer):
raise TypeError("Layer can only be multiplied with"
" another Layer or a number")
elif layeroutput.size == 1:
return layer.scaling(input=other, weight=layeroutput)
elif other.size == 1:
return layer.scaling(input=layeroutput, weight=other)
else:
raise TypeError("At least one of the operand of '*' must be a number"
" or a Layer with size=1")
Layer.__mul__ = __mul__
Layer.__rmul__ = __mul__
add_python_test(test_ploter test_ploter.py)
if (NOT APPLE)
# The Mac OS X backend will not be able to function correctly if Python is
# not installed as a framework.
add_python_test(test_ploter test_ploter.py)
endif()
......@@ -14,7 +14,7 @@
__all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'firstn'
'ComposeNotAligned', 'firstn', 'xmap_readers'
]
import itertools
......@@ -224,3 +224,74 @@ def firstn(reader, n):
yield item
return firstn_reader
class XmapEndSignal():
pass
def xmap_readers(mapper, reader, process_num, buffer_size):
"""
Use multiprocess to map samples from reader by a mapper defined by user.
And this function contains a buffered decorator.
:param mapper: a function to map sample.
:type mapper: callable
:param reader: the data reader to read from
:type reader: callable
:param process_num: process number to handle original sample
:type process_num: int
:param buffer_size: max buffer size
:type buffer_size: int
:return: the decarated reader
:rtype: callable
"""
end = XmapEndSignal()
in_queue = Queue(buffer_size)
out_queue = Queue(buffer_size)
# define a worker to read samples from reader to in_queue
def read_worker(reader, in_queue):
for i in reader():
in_queue.put(i)
in_queue.put(end)
# start a read worker in a thread
t = Thread(target=read_worker, args=(reader, in_queue))
t.daemon = True
t.start()
# define a worker to handle samples from in_queue by mapper
# and put mapped samples into out_queue
def handle_worker(in_queue, out_queue, mapper):
sample = in_queue.get()
while not isinstance(sample, XmapEndSignal):
r = mapper(sample)
out_queue.put(r)
sample = in_queue.get()
in_queue.put(end)
out_queue.put(end)
# start several handle_workers
workers = []
for i in xrange(process_num):
worker = Thread(
target=handle_worker, args=(in_queue, out_queue, mapper))
worker.daemon = True
workers.append(worker)
for w in workers:
w.start()
def xreader():
sample = out_queue.get()
while not isinstance(sample, XmapEndSignal):
yield sample
sample = out_queue.get()
finish = 1
while finish < process_num:
sample = out_queue.get()
if isinstance(sample, XmapEndSignal):
finish += 1
else:
yield sample
return xreader
add_python_test(test_v2_api test_data_feeder.py test_parameters.py
add_python_test(test_v2_api test_data_feeder.py test_op.py test_parameters.py
test_layer.py test_rnn_layer.py test_topology.py test_image.py)
......@@ -233,6 +233,30 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
self.assertEqual(out_index[i], data[i][0])
def test_dense_set_shape(self):
# test 2-D data
def gen_data(batch_size, shape):
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(np.random.random(shape))
data.append(each_sample)
return data
feeder = DataFeeder([('image', data_type.dense_array(2352))],
{'image': 0})
arg = feeder(gen_data(32, (3, 28, 28)))
h = arg.getSlotFrameHeight(0)
w = arg.getSlotFrameWidth(0)
self.assertEqual(h, 28)
self.assertEqual(w, 28)
arg = feeder(gen_data(32, (3, 30, 32)))
h = arg.getSlotFrameHeight(0)
w = arg.getSlotFrameWidth(0)
self.assertEqual(h, 30)
self.assertEqual(w, 32)
if __name__ == '__main__':
api.initPaddle("--use_gpu=0")
......
......@@ -73,7 +73,7 @@ class AggregateLayerTest(unittest.TestCase):
pool = layer.pooling(
input=pixel,
pooling_type=pooling.Avg(),
agg_level=layer.AggregateLevel.EACH_SEQUENCE)
agg_level=layer.AggregateLevel.TO_SEQUENCE)
last_seq = layer.last_seq(input=pixel)
first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq])
......@@ -109,7 +109,7 @@ class ReshapeLayerTest(unittest.TestCase):
expand = layer.expand(
input=weight,
expand_as=pixel,
expand_level=layer.ExpandLevel.FROM_TIMESTEP)
expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE)
repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49)
......@@ -164,6 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5)
layer.printer(maxid)
print layer.parse_network([maxid, sampling_id, eos])
def test_slicing_joining_layer(self):
......@@ -173,9 +174,9 @@ class OtherLayerTest(unittest.TestCase):
class ProjOpTest(unittest.TestCase):
def test_projection(self):
input = layer.data(name='data', type=data_type.dense_vector(784))
input = layer.data(name='data2', type=data_type.dense_vector(784))
word = layer.data(
name='word', type=data_type.integer_value_sequence(10000))
name='word2', type=data_type.integer_value_sequence(10000))
fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
mixed0 = layer.mixed(
......@@ -204,8 +205,8 @@ class ProjOpTest(unittest.TestCase):
dotmul1 += dotmul
context = layer.context_projection(input=fc0, context_len=5)
context0 = layer.mixed(size=100, input=context)
with layer.mixed(size=100) as context1:
context0 = layer.mixed(size=500, input=context)
with layer.mixed(size=500) as context1:
context1 += context
conv = layer.conv_projection(
......@@ -231,8 +232,8 @@ class ProjOpTest(unittest.TestCase):
print layer.parse_network(conv1)
def test_operator(self):
ipt0 = layer.data(name='data', type=data_type.dense_vector(784))
ipt1 = layer.data(name='word', type=data_type.dense_vector(128))
ipt0 = layer.data(name='data1', type=data_type.dense_vector(784))
ipt1 = layer.data(name='word1', type=data_type.dense_vector(128))
fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
......@@ -261,7 +262,7 @@ class ProjOpTest(unittest.TestCase):
class NetworkTests(unittest.TestCase):
def test_vgg(self):
img = layer.data(name='pixel', type=data_type.dense_vector(784))
img = layer.data(name='pixel1', type=data_type.dense_vector(784))
vgg_out = networks.small_vgg(
input_image=img, num_channels=1, num_classes=2)
print layer.parse_network(vgg_out)
......@@ -269,12 +270,12 @@ class NetworkTests(unittest.TestCase):
class EvaluatorTest(unittest.TestCase):
def test_evaluator(self):
img = layer.data(name='pixel', type=data_type.dense_vector(784))
img = layer.data(name='pixel2', type=data_type.dense_vector(784))
output = layer.fc(input=img,
size=10,
act=activation.Softmax(),
name='fc_here')
lbl = layer.data(name='label', type=data_type.integer_value(10))
lbl = layer.data(name='label2', type=data_type.integer_value(10))
cost = layer.cross_entropy_cost(input=output, label=lbl)
evaluator.classification_error(input=output, label=lbl)
......
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -14,26 +11,40 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Print model parameters in last model
Usage:
python evaluate_model.py
"""
import numpy as np
import os
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
def main():
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
load('output/pass-00029/b'))
import unittest
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.op as op
class OpTest(unittest.TestCase):
def test_op(self):
x = layer.data(name='data', type=data_type.dense_vector(128))
x = op.exp(x)
x = op.sqrt(x)
x = op.reciprocal(x)
x = op.log(x)
x = op.abs(x)
x = op.sigmoid(x)
x = op.tanh(x)
x = op.square(x)
x = op.relu(x)
y = 1 + x
y = y + 1
y = x + y
y = y - x
y = y - 2
y = 2 - y
y = 2 * y
y = y * 3
z = layer.data(name='data_2', type=data_type.dense_vector(1))
y = y * z
y = z * y
y = y + z
y = z + y
print layer.parse_network(y)
if __name__ == '__main__':
main()
unittest.main()
......@@ -20,6 +20,8 @@ import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
from paddle.trainer_config_helpers.config_parser_utils import \
reset_parser
class RNNTest(unittest.TestCase):
......@@ -29,6 +31,8 @@ class RNNTest(unittest.TestCase):
hidden_dim = 8
def parse_old_rnn():
reset_parser()
def step(y):
mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
out = conf_helps.fc_layer(
......@@ -42,11 +46,14 @@ class RNNTest(unittest.TestCase):
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
embd = conf_helps.embedding_layer(input=data, size=word_dim)
conf_helps.recurrent_group(name="rnn", step=step, input=embd)
conf_helps.recurrent_group(
name="rnn", step=step, input=embd, reverse=True)
return str(parse_network(test))
def parse_new_rnn():
reset_parser()
def new_step(y):
mem = layer.memory(name="rnn_state", size=hidden_dim)
out = layer.fc(input=[y, mem],
......@@ -60,7 +67,7 @@ class RNNTest(unittest.TestCase):
name="word", type=data_type.integer_value(dict_dim))
embd = layer.embedding(input=data, size=word_dim)
rnn_layer = layer.recurrent_group(
name="rnn", step=new_step, input=embd)
name="rnn", step=new_step, input=embd, reverse=True)
return str(layer.parse_network(rnn_layer))
diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
......@@ -74,6 +81,8 @@ class RNNTest(unittest.TestCase):
label_dim = 3
def parse_old_rnn():
reset_parser()
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
label = conf_helps.data_layer(name="label", size=label_dim)
......@@ -113,6 +122,7 @@ class RNNTest(unittest.TestCase):
return str(parse_network(test))
def parse_new_rnn():
reset_parser()
data = layer.data(
name="word", type=data_type.dense_vector(dict_dim))
label = layer.data(
......
......@@ -46,8 +46,8 @@ class TestTopology(unittest.TestCase):
self.assertEqual(label_data_type[1].dim, 10)
def test_get_layer(self):
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
pixel = layer.data(name='pixel2', type=data_type.dense_vector(784))
label = layer.data(name='label2', type=data_type.integer_value(10))
hidden = layer.fc(input=pixel,
size=100,
act=conf_helps.SigmoidActivation())
......@@ -56,14 +56,14 @@ class TestTopology(unittest.TestCase):
act=conf_helps.SoftmaxActivation())
cost = layer.classification_cost(input=inference, label=label)
topo = topology.Topology(cost)
pixel_layer = topo.get_layer("pixel")
label_layer = topo.get_layer("label")
pixel_layer = topo.get_layer("pixel2")
label_layer = topo.get_layer("label2")
self.assertEqual(pixel_layer, pixel)
self.assertEqual(label_layer, label)
def test_parse(self):
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
pixel = layer.data(name='pixel3', type=data_type.dense_vector(784))
label = layer.data(name='label3', type=data_type.integer_value(10))
hidden = layer.fc(input=pixel,
size=100,
act=conf_helps.SigmoidActivation())
......
......@@ -15,36 +15,13 @@
import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import paddle.trainer_config_helpers as conf_helps
import layer as v2_layer
import config_base
__all__ = ['Topology']
def __flatten__(lis):
"""
Given a list, possibly nested to any level, return it flattened.
"""
new_lis = []
for item in lis:
if isinstance(item, collections.Sequence):
new_lis.extend(__flatten__(item))
else:
new_lis.append(item)
return new_lis
def __bfs_travel__(callback, *layers):
layers = __flatten__(layers)
for each_layer in layers:
__break__ = callback(each_layer)
if __break__:
return
__layers__ = each_layer.__parent_layers__.values() + \
each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)
class Topology(object):
"""
Topology is used to store the information about all layers
......@@ -54,7 +31,6 @@ class Topology(object):
def __init__(self, layers, extra_layers=None):
def __check__(layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
......@@ -94,31 +70,18 @@ class Topology(object):
:param name:
:return:
"""
result_layer = [None]
def __impl__(l):
if l.name == name:
result_layer[0] = l
return True # break
return False
__bfs_travel__(__impl__, *self.layers)
if result_layer[0] is None:
raise ValueError("No such layer %s" % name)
return result_layer[0]
return v2_layer.get_layer(name)
def data_layers(self):
"""
get all data layer
:return:
"""
data_layers = dict()
def __impl__(l):
if isinstance(l, v2_layer.DataLayerV2):
data_layers[l.name] = l
__bfs_travel__(__impl__, *self.layers)
data_layers = {}
for layer in self.proto().layers:
l = v2_layer.get_layer(layer.name)
if l and l.layer_type == conf_helps.LayerType.DATA:
data_layers[layer.name] = l
return data_layers
def data_type(self):
......@@ -127,7 +90,8 @@ class Topology(object):
[('image', dense_vector(768)), ('label', integer_value(10))]
"""
data_layers = self.data_layers()
return [(nm, data_layers[nm].type)
return [(nm, data_layers[nm].data_type)
for nm in self.proto().input_layer_names]
def get_layer_proto(self, name):
......@@ -138,5 +102,5 @@ class Topology(object):
def __check_layer_type__(layer):
if not isinstance(layer, v2_layer.LayerV2):
raise ValueError('layer should have type paddle.layer.Layer')
if not isinstance(layer, config_base.Layer):
raise ValueError('layer should have type paddle.v2.config_base.Layer')
from setuptools import setup
packages=['paddle',
'paddle.proto',
'paddle.trainer',
......@@ -10,15 +11,19 @@ packages=['paddle',
'paddle.v2.reader',
'paddle.v2.plot']
setup_requires=["requests",
"numpy",
"protobuf==3.1",
"matplotlib",
"rarfile"]
if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=["opencv-python"]
setup(name='paddle',
version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning',
install_requires=[
"requests",
"numpy",
"protobuf==${PROTOBUF_VERSION}",
"matplotlib",
],
install_requires=setup_requires,
packages=packages,
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}'
......
The examples in v1_api_demo are using v1_api now, and will be upgraded into v2_api later.
Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and
[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
#Variational Autoencoder (VAE)
This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
In order to run the model, first download the MNIST dataset by running the shell script in ./data.
Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
$python vae_train.py [--use_gpu 1]
The generated images will be stored in ./samples/
The corresponding models will be stored in ./params/
#!/usr/bin/env sh
# This script downloads the mnist data and unzips it.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
rm -rf "$DIR/mnist_data"
mkdir "$DIR/mnist_data"
cd "$DIR/mnist_data"
echo "Downloading..."
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
if [ ! -e $fname ]; then
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
gunzip ${fname}.gz
fi
done
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -13,39 +12,49 @@
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
import numpy as np
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
#download the dataset
echo "Downloading aclImdb..."
#http://ai.stanford.edu/%7Eamaas/data/sentiment/
wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
class MNISTloader():
def __init__(self,
data_path="./data/mnist_data/",
batch_size=60,
process='train'):
self.batch_size = batch_size
self.data_path = data_path
self._pointer = 0
self.image_batches = np.array([])
self.process = process
echo "Downloading mosesdecoder..."
#https://github.com/moses-smt/mosesdecoder
wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
def _extract_images(self, filename, n):
f = open(filename, 'rb')
f.read(16)
data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
#Mapping data into [-1, 1]
data = data / 255. * 2. - 1
data_batches = np.split(data, 60000 / self.batch_size, 0)
#extract package
echo "Unzipping..."
tar -zxvf aclImdb_v1.tar.gz
unzip master.zip
f.close()
#move train and test set to imdb_data directory
#in order to process when traing
mkdir -p imdb/train
mkdir -p imdb/test
return data_batches
cp -r aclImdb/train/pos/ imdb/train/pos
cp -r aclImdb/train/neg/ imdb/train/neg
@property
def pointer(self):
return self._pointer
cp -r aclImdb/test/pos/ imdb/test/pos
cp -r aclImdb/test/neg/ imdb/test/neg
def load_data(self):
TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
#remove compressed package
rm aclImdb_v1.tar.gz
rm master.zip
if self.process == 'train':
self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
else:
self.image_batches = self._extract_images(TEST_IMAGES, 10000)
echo "Done."
def next_batch(self):
batch = self.image_batches[self._pointer]
self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
return np.array(batch)
def reset_pointer(self):
self._pointer = 0
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
import numpy as np
is_generating = get_config_arg("is_generating", bool, False)
settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
X_dim = 28 * 28
h_dim = 128
z_dim = 100
def reparameterization(mu, logvar):
eps = ParamAttr(initial_mean=0., initial_std=1)
with mixed_layer() as sigma:
sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
return mu + sigma
def q_func(X):
"""
xavier initialization
"""
param_attr = ParamAttr(
name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
mu_param = ParamAttr(
name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
logvar_param = ParamAttr(
name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
share_layer = fc_layer(
X,
size=h_dim,
param_attr=param_attr,
bias_attr=bias_attr,
act=ReluActivation())
return (fc_layer(
share_layer,
size=z_dim,
param_attr=mu_param,
bias_attr=mu_bias,
act=LinearActivation()), fc_layer(
share_layer,
size=z_dim,
param_attr=logvar_param,
bias_attr=logvar_bias,
act=LinearActivation()))
def generator(z):
hidden_param = ParamAttr(
name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
prob_param = ParamAttr(
name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
hidden_layer = fc_layer(
z,
size=h_dim,
act=ReluActivation(),
param_attr=hidden_param,
bias_attr=hidden_bias)
prob = fc_layer(
hidden_layer,
size=X_dim,
act=SigmoidActivation(),
param_attr=prob_param,
bias_attr=prob_bias)
return prob
def reconstruct_error(prob, X):
cost = multi_binary_label_cross_entropy(input=prob, label=X)
return cost
def KL_loss(mu, logvar):
with mixed_layer() as mu_square:
mu_square += dotmul_operator(mu, mu, scale=1.)
cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
return cost
if not is_generating:
x_batch = data_layer(name='x_batch', size=X_dim)
mu, logvar = q_func(x_batch)
z_samples = reparameterization(mu, logvar)
prob = generator(z_samples)
outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
else:
z_samples = data_layer(name='noise', size=z_dim)
outputs(generator(z_samples))
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import random
import numpy as np
import cPickle
import sys, os
from PIL import Image
from paddle.trainer.config_parser import parse_config
from paddle.trainer.config_parser import logger
import py_paddle.swig_paddle as api
import dataloader
import matplotlib.pyplot as plt
def plot_samples(samples):
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(4, 4)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
plt.subplot(gs[i])
plt.axis('off')
plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
return fig
def CHECK_EQ(a, b):
assert a == b, "a=%s, b=%s" % (a, b)
def get_fake_samples(generator_machine, batch_size, noise):
gen_inputs = api.Arguments.createArguments(1)
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
gen_outputs = api.Arguments.createArguments(0)
generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
return fake_samples
def copy_shared_parameters(src, dst):
'''
copy the parameters from src to dst
:param src: the source of the parameters
:type src: GradientMachine
:param dst: the destination of the parameters
:type dst: GradientMachine
'''
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
src_params = dict([(p.getName(), p) for p in src_params])
for i in xrange(dst.getParameterSize()):
dst_param = dst.getParameter(i)
src_param = src_params.get(dst_param.getName(), None)
if src_param is None:
continue
src_value = src_param.getBuf(api.PARAMETER_VALUE)
dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
CHECK_EQ(len(src_value), len(dst_value))
dst_value.copyFrom(src_value)
dst_param.setValueUpdated()
def find(iterable, cond):
for item in iterable:
if cond(item):
return item
return None
def get_layer_size(model_conf, layer_name):
layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
return layer_conf.size
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--use_gpu", default="1", help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
args = parser.parse_args()
use_gpu = args.use_gpu
assert use_gpu in ["0", "1"]
if not os.path.exists("./samples/"):
os.makedirs("./samples/")
if not os.path.exists("./params/"):
os.makedirs("./params/")
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
'--log_period=1000', '--gpu_id=' + args.gpu_id,
'--save_dir=' + "./params/")
conf = "vae_conf.py"
trainer_conf = parse_config(conf, "is_generating=False")
gener_conf = parse_config(conf, "is_generating=True")
batch_size = trainer_conf.opt_config.batch_size
noise_dim = get_layer_size(gener_conf.model_config, "noise")
mnist = dataloader.MNISTloader(batch_size=batch_size)
mnist.load_data()
training_machine = api.GradientMachine.createFromConfigProto(
trainer_conf.model_config)
generator_machine = api.GradientMachine.createFromConfigProto(
gener_conf.model_config)
trainer = api.Trainer.create(trainer_conf, training_machine)
trainer.startTrain()
for train_pass in xrange(100):
trainer.startTrainPass()
mnist.reset_pointer()
i = 0
it = 0
while mnist.pointer != 0 or i == 0:
X = mnist.next_batch().astype('float32')
inputs = api.Arguments.createArguments(1)
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
trainer.trainOneDataBatch(batch_size, inputs)
if it % 1000 == 0:
outputs = api.Arguments.createArguments(0)
training_machine.forward(inputs, outputs, api.PASS_TEST)
loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
print "\niter: {}".format(str(it).zfill(3))
print "VAE loss: {}".format(str(loss).zfill(3))
#Sync parameters between networks (GradientMachine) at the beginning
copy_shared_parameters(training_machine, generator_machine)
z_samples = np.random.randn(batch_size,
noise_dim).astype('float32')
samples = get_fake_samples(generator_machine, batch_size,
z_samples)
#Generating the first 16 images for a picture.
figure = plot_samples(samples[:16])
plt.savefig(
"./samples/{}_{}.png".format(
str(train_pass).zfill(3), str(i).zfill(3)),
bbox_inches='tight')
plt.close(figure)
i += 1
it += 1
trainer.finishTrainPass()
trainer.finishTrain()
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册