提交 b783e08e 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into row_conv

...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
hooks: hooks:
- id: remove-crlf - id: remove-crlf
files: (?!.*third_party)^.*$ | (?!.*book)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git - repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: v0.13.2 sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks: hooks:
- id: yapf - id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -26,7 +26,7 @@ ENDIF(WIN32) ...@@ -26,7 +26,7 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add( ExternalProject_Add(
gflags extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gflags/gflags.git" GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
...@@ -44,4 +44,8 @@ ExternalProject_Add( ...@@ -44,4 +44,8 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=Release
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags)
LIST(APPEND external_project_dependencies gflags) LIST(APPEND external_project_dependencies gflags)
...@@ -27,7 +27,7 @@ ENDIF(WIN32) ...@@ -27,7 +27,7 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add( ExternalProject_Add(
glog extern_glog
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
...@@ -48,4 +48,8 @@ ExternalProject_Add( ...@@ -48,4 +48,8 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=Release
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog)
LIST(APPEND external_project_dependencies glog) LIST(APPEND external_project_dependencies glog)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -35,7 +35,7 @@ IF(WITH_TESTING) ...@@ -35,7 +35,7 @@ IF(WITH_TESTING)
ENDIF(WIN32) ENDIF(WIN32)
ExternalProject_Add( ExternalProject_Add(
gtest extern_gtest
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/google/googletest.git" GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG "release-1.8.0" GIT_TAG "release-1.8.0"
...@@ -55,5 +55,14 @@ IF(WITH_TESTING) ...@@ -55,5 +55,14 @@ IF(WITH_TESTING)
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=Release
) )
LIST(APPEND external_project_dependencies gtest)
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
ADD_DEPENDENCIES(gtest extern_gtest)
ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
ADD_DEPENDENCIES(gtest_main extern_gtest)
LIST(APPEND external_project_dependencies gtest gtest_main)
ENDIF(WITH_TESTING) ENDIF(WITH_TESTING)
...@@ -41,7 +41,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -41,7 +41,7 @@ IF(NOT ${CBLAS_FOUND})
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
openblas extern_openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG ${OPENBLAS_COMMIT} GIT_TAG ${OPENBLAS_COMMIT}
...@@ -53,8 +53,14 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -53,8 +53,14 @@ IF(NOT ${CBLAS_FOUND})
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
) )
LIST(APPEND external_project_dependencies openblas)
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
ADD_LIBRARY(cblas STATIC IMPORTED)
SET_PROPERTY(TARGET cblas PROPERTY IMPORTED_LOCATION ${CBLAS_LIBRARIES})
IF(NOT ${CBLAS_FOUND})
ADD_DEPENDENCIES(cblas extern_openblas)
LIST(APPEND external_project_dependencies cblas)
ENDIF(NOT ${CBLAS_FOUND})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -43,7 +43,7 @@ ELSE() ...@@ -43,7 +43,7 @@ ELSE()
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
PREFIX ${WARPCTC_SOURCES_DIR} PREFIX ${WARPCTC_SOURCES_DIR}
...@@ -65,4 +65,8 @@ ExternalProject_Add( ...@@ -65,4 +65,8 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
) )
ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
ADD_DEPENDENCIES(warpctc extern_warpctc)
LIST(APPEND external_project_dependencies warpctc) LIST(APPEND external_project_dependencies warpctc)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# To simplify the build process of PaddlePaddle, we defined couple of # To simplify the build process of PaddlePaddle, we defined couple of
# fundamental abstractions, e.g., how to build library, binary and # fundamental abstractions, e.g., how to build library, binary and
# test in C++, CUDA and Go. # test in C++, CUDA and Go.
# #
# ------------------------------------------- # -------------------------------------------
# C++ CUDA C++ Go # C++ CUDA C++ Go
# ------------------------------------------- # -------------------------------------------
...@@ -29,6 +29,11 @@ ...@@ -29,6 +29,11 @@
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html # https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
# #
if(NOT APPLE)
find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT})
endif(NOT APPLE)
# cc_library parses tensor.cc and figures out that target also depend on tensor.h. # cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor # cc_library(tensor
# SRCS # SRCS
...@@ -45,7 +50,9 @@ function(cc_library TARGET_NAME) ...@@ -45,7 +50,9 @@ function(cc_library TARGET_NAME)
else() else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif() endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies}) if (cc_library_DEPS)
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
endfunction(cc_library) endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. # cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
...@@ -58,8 +65,7 @@ function(cc_binary TARGET_NAME) ...@@ -58,8 +65,7 @@ function(cc_binary TARGET_NAME)
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS}) add_executable(${TARGET_NAME} ${cc_binary_SRCS})
link_paddle_exe(${TARGET_NAME}) if(cc_binary_DEPS)
if(cc_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
endif() endif()
...@@ -73,17 +79,16 @@ endfunction(cc_binary) ...@@ -73,17 +79,16 @@ endfunction(cc_binary)
# DEPS # DEPS
# tensor) # tensor)
function(cc_test TARGET_NAME) function(cc_test TARGET_NAME)
set(options "") if(WITH_TESTING)
set(oneValueArgs "") set(options "")
set(multiValueArgs SRCS DEPS) set(oneValueArgs "")
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(multiValueArgs SRCS DEPS)
add_executable(${TARGET_NAME} ${cc_test_SRCS}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
link_paddle_test(${TARGET_NAME}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
if(cc_test_DEPS) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test) endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as # Suppose that ops.cu includes global functions that take Tensor as
...@@ -95,28 +100,33 @@ endfunction(cc_test) ...@@ -95,28 +100,33 @@ endfunction(cc_test)
# DEPS # DEPS
# tensor) # tensor)
function(nv_library TARGET_NAME) function(nv_library TARGET_NAME)
set(options OPTIONAL) if (WITH_GPU)
set(oneValueArgs "") set(options OPTIONAL)
set(multiValueArgs SRCS DEPS) set(oneValueArgs "")
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(multiValueArgs SRCS DEPS)
if (${nv_library_OPTIONAL} STREQUAL "SHARED") cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) if (${nv_library_OPTIONAL} STREQUAL "SHARED")
else() cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
endif()
if (nv_library_DEPS)
add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
endif()
endif() endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
endfunction(nv_library) endfunction(nv_library)
function(nv_binary TARGET_NAME) function(nv_binary TARGET_NAME)
set(options "") if (WITH_GPU)
set(oneValueArgs "") set(options "")
set(multiValueArgs SRCS DEPS) set(oneValueArgs "")
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(multiValueArgs SRCS DEPS)
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
link_paddle_exe(${TARGET_NAME}) cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
if(nv_binary_DEPS) if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
endif()
endif() endif()
endfunction(nv_binary) endfunction(nv_binary)
...@@ -128,17 +138,16 @@ endfunction(nv_binary) ...@@ -128,17 +138,16 @@ endfunction(nv_binary)
# DEPS # DEPS
# ops) # ops)
function(nv_test TARGET_NAME) function(nv_test TARGET_NAME)
set(options "") if (WITH_GPU AND WITH_TESTING)
set(oneValueArgs "") set(options "")
set(multiValueArgs SRCS DEPS) set(oneValueArgs "")
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(multiValueArgs SRCS DEPS)
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
link_paddle_test(${TARGET_NAME}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
if(nv_test_DEPS) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test) endfunction(nv_test)
set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
...@@ -164,7 +173,7 @@ function(go_library TARGET_NAME) ...@@ -164,7 +173,7 @@ function(go_library TARGET_NAME)
set(LIB_NAME "lib${TARGET_NAME}.dylib") set(LIB_NAME "lib${TARGET_NAME}.dylib")
else() else()
set(LIB_NAME "lib${TARGET_NAME}.so") set(LIB_NAME "lib${TARGET_NAME}.so")
endif() endif()
else() else()
set(BUILD_MODE "-buildmode=c-archive") set(BUILD_MODE "-buildmode=c-archive")
set(LIB_NAME "lib${TARGET_NAME}.a") set(LIB_NAME "lib${TARGET_NAME}.a")
...@@ -190,8 +199,8 @@ function(go_binary TARGET_NAME) ...@@ -190,8 +199,8 @@ function(go_binary TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_library_SRCS} ${go_library_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
endfunction(go_binary) endfunction(go_binary)
...@@ -204,8 +213,8 @@ function(go_test TARGET_NAME) ...@@ -204,8 +213,8 @@ function(go_test TARGET_NAME)
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test
-c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_test_SRCS} ${go_test_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
endfunction(go_test) endfunction(go_test)
......
...@@ -10,7 +10,7 @@ if(WITH_RDMA) ...@@ -10,7 +10,7 @@ if(WITH_RDMA)
function(generate_rdma_links) function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment #redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment. #it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick #runtime libraries that will crash process while loading it. That redirect trick
#can fix it. #can fix it.
...@@ -19,7 +19,9 @@ if(WITH_RDMA) ...@@ -19,7 +19,9 @@ if(WITH_RDMA)
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1 COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1 COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
) )
endfunction(generate_rdma_links) endfunction(generate_rdma_links)
...@@ -44,7 +46,7 @@ if(WITH_RDMA) ...@@ -44,7 +46,7 @@ if(WITH_RDMA)
RDMA_INC_XIO AND RDMA_INC_XIO AND
RDMA_INC_EVENT AND RDMA_INC_EVENT AND
RDMA_INC_NUMA AND RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND RDMA_LIB_EVENT_CORE AND
...@@ -53,19 +55,19 @@ if(WITH_RDMA) ...@@ -53,19 +55,19 @@ if(WITH_RDMA)
RDMA_LIB_NUMA RDMA_LIB_NUMA
) )
set(RDMA_INC_DIR set(RDMA_INC_DIR
${RDMA_INC_SXISOCK} ${RDMA_INC_SXISOCK}
${RDMA_INC_XIO} ${RDMA_INC_XIO}
${RDMA_INC_EVENT} ${RDMA_INC_EVENT}
${RDMA_INC_NUMA}) ${RDMA_INC_NUMA})
set(RDMA_LIBS set(RDMA_LIBS
${RDMA_LIB_SXISOCK} ${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO} ${RDMA_LIB_XIO}
${RDMA_LIB_EVENT} ${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE} ${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA} ${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS} ${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA} ${RDMA_LIB_NUMA}
) )
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma") set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
include_directories("${RDMA_INC_DIR}") include_directories("${RDMA_INC_DIR}")
......
...@@ -12,13 +12,13 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打 ...@@ -12,13 +12,13 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次
PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 提供最新 `dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国 的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国
内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您 内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您
在国内,请把文档里命令中的paddlepaddle/paddle替换成 在国内,请把文档里命令中的paddlepaddle/paddle替换成
docker.paddlepaddle.org/paddle。 docker.paddlepaddle.org/paddle。
1. 开发镜像::code:`paddlepaddle/paddle:<version>-dev` 1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev`
这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布, 这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布,
文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。 文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。
...@@ -37,13 +37,13 @@ docker.paddlepaddle.org/paddle。 ...@@ -37,13 +37,13 @@ docker.paddlepaddle.org/paddle。
.. code-block:: bash .. code-block:: bash
docker run -it --rm paddlepaddle/paddle:<version>-dev /bin/bash docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash
或者,可以以后台进程方式运行容器: 或者,可以以后台进程方式运行容器:
.. code-block:: bash .. code-block:: bash
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:<version>-dev docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev
然后用密码 :code:`root` SSH进入容器: 然后用密码 :code:`root` SSH进入容器:
...@@ -73,7 +73,7 @@ docker.paddlepaddle.org/paddle。 ...@@ -73,7 +73,7 @@ docker.paddlepaddle.org/paddle。
.. code-block:: bash .. code-block:: bash
nvidia-docker run -it --rm paddledev/paddle:0.10.0rc1-gpu /bin/bash nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash
注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。: 注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。:
...@@ -81,7 +81,7 @@ docker.paddlepaddle.org/paddle。 ...@@ -81,7 +81,7 @@ docker.paddlepaddle.org/paddle。
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:<version>-gpu docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu
3. 运行以及发布您的AI程序 3. 运行以及发布您的AI程序
...@@ -98,7 +98,7 @@ docker.paddlepaddle.org/paddle。 ...@@ -98,7 +98,7 @@ docker.paddlepaddle.org/paddle。
nvidia-docker run -it -v $PWD:/work paddle /work/a.py nvidia-docker run -it -v $PWD:/work paddle /work/a.py
这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:<version>` 这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0`
创建和发布自己的AI程序镜像。 创建和发布自己的AI程序镜像。
运行PaddlePaddle Book 运行PaddlePaddle Book
...@@ -177,7 +177,7 @@ Paddle的Docker开发镜像带有一个通过 `woboq code browser ...@@ -177,7 +177,7 @@ Paddle的Docker开发镜像带有一个通过 `woboq code browser
.. code-block:: bash .. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:<version>-dev docker run -d --name paddle-cpu-doc paddle:0.10.0-dev
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
...@@ -23,7 +23,7 @@ Docker is simple as long as we understand a few basic concepts: ...@@ -23,7 +23,7 @@ Docker is simple as long as we understand a few basic concepts:
.. code-block:: bash .. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0rc2 docker pull paddlepaddle/paddle:0.10.0
to download a Docker image, paddlepaddle/paddle in this example, to download a Docker image, paddlepaddle/paddle in this example,
from Dockerhub.com. from Dockerhub.com.
...@@ -35,7 +35,7 @@ Docker is simple as long as we understand a few basic concepts: ...@@ -35,7 +35,7 @@ Docker is simple as long as we understand a few basic concepts:
.. code-block:: bash .. code-block:: bash
docker run paddlepaddle/paddle:0.10.0rc2 docker run paddlepaddle/paddle:0.10.0
to start a container to run a Docker image, paddlepaddle/paddle in this example. to start a container to run a Docker image, paddlepaddle/paddle in this example.
...@@ -62,7 +62,7 @@ of PaddlePaddle, we release both of them. Production image includes ...@@ -62,7 +62,7 @@ of PaddlePaddle, we release both of them. Production image includes
CPU-only version and a CUDA GPU version and their no-AVX versions. CPU-only version and a CUDA GPU version and their no-AVX versions.
We put the docker images on `dockerhub.com We put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddledev/paddle/>`_. You can find the <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
latest versions under "tags" tab at dockerhub.com. If you are in latest versions under "tags" tab at dockerhub.com. If you are in
China, you can use our Docker image registry mirror to speed up the China, you can use our Docker image registry mirror to speed up the
download process. To use it, please replace all paddlepaddle/paddle in download process. To use it, please replace all paddlepaddle/paddle in
...@@ -89,7 +89,7 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -89,7 +89,7 @@ the commands to docker.paddlepaddle.org/paddle.
.. code-block:: bash .. code-block:: bash
docker run -it --rm paddlepaddle/paddle:0.10.0rc2 /bin/bash docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash
Above method work with the GPU image too -- the recommended way is Above method work with the GPU image too -- the recommended way is
using `nvidia-docker <https://github.com/NVIDIA/nvidia-docker>`_. using `nvidia-docker <https://github.com/NVIDIA/nvidia-docker>`_.
...@@ -101,7 +101,7 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -101,7 +101,7 @@ the commands to docker.paddlepaddle.org/paddle.
.. code-block:: bash .. code-block:: bash
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
2. development image :code:`paddlepaddle/paddle:<version>-dev` 2. development image :code:`paddlepaddle/paddle:<version>-dev`
...@@ -149,13 +149,13 @@ Run the program using docker: ...@@ -149,13 +149,13 @@ Run the program using docker:
.. code-block:: bash .. code-block:: bash
docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 python /workspace/example.py docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py
Or if you are using GPU for training: Or if you are using GPU for training:
.. code-block:: bash .. code-block:: bash
nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu python /workspace/example.py nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py
Above commands will start a docker container by running :code:`python Above commands will start a docker container by running :code:`python
/workspace/example.py`. It will stop once :code:`python /workspace/example.py`. It will stop once :code:`python
...@@ -166,7 +166,7 @@ run PaddlePaddle program interactively: ...@@ -166,7 +166,7 @@ run PaddlePaddle program interactively:
.. code-block:: bash .. code-block:: bash
docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 /bin/bash docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash
# now we are inside docker container # now we are inside docker container
cd /workspace cd /workspace
python example.py python example.py
...@@ -175,7 +175,7 @@ Running with GPU is identical: ...@@ -175,7 +175,7 @@ Running with GPU is identical:
.. code-block:: bash .. code-block:: bash
nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash
# now we are inside docker container # now we are inside docker container
cd /workspace cd /workspace
python example.py python example.py
......
...@@ -28,17 +28,17 @@ pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API ...@@ -28,17 +28,17 @@ pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API
seq_pool = pooling(input=layer, seq_pool = pooling(input=layer,
pooling_type=pooling.Max(), pooling_type=pooling.Max(),
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.TO_SEQUENCE)
- `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。 - `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值):
- 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列 - 输入:一个双层序列,或一个单层序列
- 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值) - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值)
- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: - `agg_level=AggregateLevel.TO_SEQUENCE` 时:
- 作用:一个双层序列经过运算变成一个单层序列 - 作用:一个双层序列经过运算变成一个单层序列
- 输入:必须是一个双层序列 - 输入:必须是一个双层序列
...@@ -52,15 +52,15 @@ last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详 ...@@ -52,15 +52,15 @@ last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详
.. code-block:: bash .. code-block:: bash
last = last_seq(input=layer, last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.TO_SEQUENCE)
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值):
- 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列 - 输入:一个双层序列或一个单层序列
- 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。 - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。
- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: - `agg_level=AggregateLevel.TO_SEQUENCE` 时:
- 作用:一个双层序列经过运算变成一个单层序列 - 作用:一个双层序列经过运算变成一个单层序列
- 输入:必须是一个双层序列 - 输入:必须是一个双层序列
- 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。
...@@ -74,9 +74,9 @@ expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。 ...@@ -74,9 +74,9 @@ expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。
ex = expand(input=layer1, ex = expand(input=layer1,
expand_as=layer2, expand_as=layer2,
expand_level=ExpandLevel.FROM_TIMESTEP) expand_level=ExpandLevel.FROM_NO_SEQUENCE)
- `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值): - `expand_level=ExpandLevel.FROM_NO_SEQUENCE` 时(默认值):
- 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列 - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列
- 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息 - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息
......
...@@ -81,7 +81,7 @@ ...@@ -81,7 +81,7 @@
* 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。 * 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。
* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.EACH_SEQUENCE`\ 。 * 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.TO_SEQUENCE`\ 。
* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 * 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。
......
...@@ -14,7 +14,7 @@ import ( ...@@ -14,7 +14,7 @@ import (
"github.com/namsral/flag" "github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/Paddle/go/recordio" "github.com/PaddlePaddle/recordio"
) )
func main() { func main() {
......
...@@ -6,7 +6,7 @@ import ( ...@@ -6,7 +6,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/recordio" "github.com/PaddlePaddle/recordio"
) )
const ( const (
......
...@@ -4,5 +4,8 @@ include_directories(${CMAKE_BINARY_DIR}) ...@@ -4,5 +4,8 @@ include_directories(${CMAKE_BINARY_DIR})
add_executable(main main.c) add_executable(main main.c)
add_dependencies(main client) add_dependencies(main client)
set (CMAKE_EXE_LINKER_FLAGS "-pthread")
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif()
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a) target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)
...@@ -17,7 +17,7 @@ retry: ...@@ -17,7 +17,7 @@ retry:
paddle_parameter param; paddle_parameter param;
char name_a[] = "param_a"; char name_a[] = "param_a";
char name_b[] = "param_b"; char name_b[] = "param_b";
char content[] = {0x00, 0x11, 0x22}; unsigned char content[] = {0x00, 0x11, 0x22};
param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32; param.element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
param.name = name_a; param.name = name_a;
param.content = content; param.content = content;
...@@ -39,7 +39,7 @@ retry: ...@@ -39,7 +39,7 @@ retry:
fail(); fail();
} }
char content[] = {0x00, 0x11, 0x22}; unsigned char content[] = {0x00, 0x11, 0x22};
paddle_gradient grads[2] = { paddle_gradient grads[2] = {
{"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3}, {"param_a", PADDLE_ELEMENT_TYPE_INT32, content, 3},
{"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}}; {"param_b", PADDLE_ELEMENT_TYPE_FLOAT32, content, 3}};
......
# RecordIO
## Write
```go
f, e := os.Create("a_file.recordio")
w := recordio.NewWriter(f)
w.Write([]byte("Hello"))
w.Write([]byte("World!"))
w.Close()
f.Close()
```
## Read
1. Load chunk index:
```go
f, e := os.Open("a_file.recordio")
idx, e := recordio.LoadIndex(f)
fmt.Println("Total records: ", idx.Len())
f.Close()
```
2. Create one or more scanner to read a range of records. The
following example reads the range
[1, 3), i.e., the second and the third records:
```go
f, e := os.Open("a_file.recordio")
s := recrodio.NewScanner(f, idx, 1, 3)
for s.Scan() {
fmt.Println(string(s.Record()))
}
if s.Err() != nil {
log.Fatalf("Something wrong with scanning: %v", e)
}
f.Close()
```
cmake_minimum_required(VERSION 3.0)
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
project(cxx_go C Go)
include(golang)
include(flags)
go_library(recordio STATIC)
add_subdirectory(test)
package main
/*
#include <string.h>
typedef int reader;
typedef int writer;
*/
import "C"
import (
"log"
"os"
"strings"
"unsafe"
"github.com/PaddlePaddle/Paddle/go/recordio"
)
var nullPtr = unsafe.Pointer(uintptr(0))
type writer struct {
w *recordio.Writer
f *os.File
}
type reader struct {
scanner *recordio.Scanner
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr {
return nil
}
// create a Go clice backed by a C array, reference:
// https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
//
// Go garbage collector will not interact with this data, need
// to be freed properly.
return (*[1 << 30]byte)(p)[:len:len]
}
//export create_recordio_writer
func create_recordio_writer(path *C.char) C.writer {
p := C.GoString(path)
f, err := os.Create(p)
if err != nil {
log.Println(err)
return -1
}
w := recordio.NewWriter(f, -1, -1)
writer := &writer{f: f, w: w}
return addWriter(writer)
}
//export recordio_write
func recordio_write(writer C.writer, buf *C.uchar, size C.int) C.int {
w := getWriter(writer)
b := cArrayToSlice(unsafe.Pointer(buf), int(size))
c, err := w.w.Write(b)
if err != nil {
log.Println(err)
return -1
}
return C.int(c)
}
//export release_recordio_writer
func release_recordio_writer(writer C.writer) {
w := removeWriter(writer)
w.w.Close()
w.f.Close()
}
//export create_recordio_reader
func create_recordio_reader(path *C.char) C.reader {
p := C.GoString(path)
s, err := recordio.NewScanner(strings.Split(p, ",")...)
if err != nil {
log.Println(err)
return -1
}
r := &reader{scanner: s}
return addReader(r)
}
//export recordio_read
func recordio_read(reader C.reader, record **C.uchar) C.int {
r := getReader(reader)
if r.scanner.Scan() {
buf := r.scanner.Record()
if len(buf) == 0 {
*record = (*C.uchar)(nullPtr)
return 0
}
size := C.int(len(buf))
*record = (*C.uchar)(C.malloc(C.size_t(len(buf))))
C.memcpy(unsafe.Pointer(*record), unsafe.Pointer(&buf[0]), C.size_t(len(buf)))
return size
}
return -1
}
//export release_recordio_reader
func release_recordio_reader(reader C.reader) {
r := removeReader(reader)
r.scanner.Close()
}
func main() {} // Required but ignored
package main
/*
typedef int reader;
typedef int writer;
*/
import "C"
import "sync"
var mu sync.Mutex
var handleMap = make(map[C.reader]*reader)
var curHandle C.reader
var writerMap = make(map[C.writer]*writer)
var curWriterHandle C.writer
func addReader(r *reader) C.reader {
mu.Lock()
defer mu.Unlock()
reader := curHandle
curHandle++
handleMap[reader] = r
return reader
}
func getReader(reader C.reader) *reader {
mu.Lock()
defer mu.Unlock()
return handleMap[reader]
}
func removeReader(reader C.reader) *reader {
mu.Lock()
defer mu.Unlock()
r := handleMap[reader]
delete(handleMap, reader)
return r
}
func addWriter(w *writer) C.writer {
mu.Lock()
defer mu.Unlock()
writer := curWriterHandle
curWriterHandle++
writerMap[writer] = w
return writer
}
func getWriter(writer C.writer) *writer {
mu.Lock()
defer mu.Unlock()
return writerMap[writer]
}
func removeWriter(writer C.writer) *writer {
mu.Lock()
defer mu.Unlock()
w := writerMap[writer]
delete(writerMap, writer)
return w
}
cmake_minimum_required(VERSION 3.0)
include_directories(${CMAKE_BINARY_DIR})
add_executable(recordio_test test.c)
add_dependencies(recordio_test recordio)
set (CMAKE_EXE_LINKER_FLAGS "-pthread")
target_link_libraries(recordio_test ${CMAKE_BINARY_DIR}/librecordio.a)
#include <stdio.h>
#include <stdlib.h>
#include "librecordio.h"
void fail() {
// TODO(helin): fix: gtest using cmake is not working, using this
// hacky way for now.
printf("test failed.\n");
exit(-1);
}
int main() {
writer w = create_recordio_writer("/tmp/test_recordio_0");
recordio_write(w, "hello", 6);
recordio_write(w, "hi", 3);
release_recordio_writer(w);
w = create_recordio_writer("/tmp/test_recordio_1");
recordio_write(w, "dog", 4);
recordio_write(w, "cat", 4);
release_recordio_writer(w);
reader r = create_recordio_reader("/tmp/test_recordio_*");
unsigned char* item = NULL;
int size = recordio_read(r, &item);
if (strcmp(item, "hello") || size != 6) {
fail();
}
free(item);
size = recordio_read(r, &item);
if (strcmp(item, "hi") || size != 3) {
fail();
}
free(item);
size = recordio_read(r, &item);
if (strcmp(item, "dog") || size != 4) {
fail();
}
free(item);
size = recordio_read(r, &item);
if (strcmp(item, "cat") || size != 4) {
fail();
}
free(item);
size = recordio_read(r, &item);
if (size != -1) {
fail();
}
release_recordio_reader(r);
}
package recordio
import (
"bytes"
"compress/gzip"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"github.com/golang/snappy"
)
// A Chunk contains the Header and optionally compressed records. To
// create a chunk, just use ch := &Chunk{}.
type Chunk struct {
records [][]byte
numBytes int // sum of record lengths.
}
func (ch *Chunk) add(record []byte) {
ch.records = append(ch.records, record)
ch.numBytes += len(record)
}
// dump the chunk into w, and clears the chunk and makes it ready for
// the next add invocation.
func (ch *Chunk) dump(w io.Writer, compressorIndex int) error {
// NOTE: don't check ch.numBytes instead, because empty
// records are allowed.
if len(ch.records) == 0 {
return nil
}
// Write raw records and their lengths into data buffer.
var data bytes.Buffer
for _, r := range ch.records {
var rs [4]byte
binary.LittleEndian.PutUint32(rs[:], uint32(len(r)))
if _, e := data.Write(rs[:]); e != nil {
return fmt.Errorf("Failed to write record length: %v", e)
}
if _, e := data.Write(r); e != nil {
return fmt.Errorf("Failed to write record: %v", e)
}
}
compressed, e := compressData(&data, compressorIndex)
if e != nil {
return e
}
// Write chunk header and compressed data.
hdr := &Header{
checkSum: crc32.ChecksumIEEE(compressed.Bytes()),
compressor: uint32(compressorIndex),
compressedSize: uint32(compressed.Len()),
numRecords: uint32(len(ch.records)),
}
if _, e := hdr.write(w); e != nil {
return fmt.Errorf("Failed to write chunk header: %v", e)
}
if _, e := w.Write(compressed.Bytes()); e != nil {
return fmt.Errorf("Failed to write chunk data: %v", e)
}
// Clear the current chunk.
ch.records = nil
ch.numBytes = 0
return nil
}
type noopCompressor struct {
*bytes.Buffer
}
func (c *noopCompressor) Close() error {
return nil
}
func compressData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
compressed := new(bytes.Buffer)
var compressor io.WriteCloser
switch compressorIndex {
case NoCompression:
compressor = &noopCompressor{compressed}
case Snappy:
compressor = snappy.NewBufferedWriter(compressed)
case Gzip:
compressor = gzip.NewWriter(compressed)
default:
return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
}
if _, e := io.Copy(compressor, src); e != nil {
return nil, fmt.Errorf("Failed to compress chunk data: %v", e)
}
compressor.Close()
return compressed, nil
}
// parse the specified chunk from r.
func parseChunk(r io.ReadSeeker, chunkOffset int64) (*Chunk, error) {
var e error
var hdr *Header
if _, e = r.Seek(chunkOffset, io.SeekStart); e != nil {
return nil, fmt.Errorf("Failed to seek chunk: %v", e)
}
hdr, e = parseHeader(r)
if e != nil {
return nil, fmt.Errorf("Failed to parse chunk header: %v", e)
}
var buf bytes.Buffer
if _, e = io.CopyN(&buf, r, int64(hdr.compressedSize)); e != nil {
return nil, fmt.Errorf("Failed to read chunk data: %v", e)
}
if hdr.checkSum != crc32.ChecksumIEEE(buf.Bytes()) {
return nil, fmt.Errorf("Checksum checking failed.")
}
deflated, e := deflateData(&buf, int(hdr.compressor))
if e != nil {
return nil, e
}
ch := &Chunk{}
for i := 0; i < int(hdr.numRecords); i++ {
var rs [4]byte
if _, e = deflated.Read(rs[:]); e != nil {
return nil, fmt.Errorf("Failed to read record length: %v", e)
}
r := make([]byte, binary.LittleEndian.Uint32(rs[:]))
if _, e = deflated.Read(r); e != nil {
return nil, fmt.Errorf("Failed to read a record: %v", e)
}
ch.records = append(ch.records, r)
ch.numBytes += len(r)
}
return ch, nil
}
func deflateData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
var e error
var deflator io.Reader
switch compressorIndex {
case NoCompression:
deflator = src
case Snappy:
deflator = snappy.NewReader(src)
case Gzip:
deflator, e = gzip.NewReader(src)
if e != nil {
return nil, fmt.Errorf("Failed to create gzip reader: %v", e)
}
default:
return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
}
deflated := new(bytes.Buffer)
if _, e = io.Copy(deflated, deflator); e != nil {
return nil, fmt.Errorf("Failed to deflate chunk data: %v", e)
}
return deflated, nil
}
package recordio
import (
"encoding/binary"
"fmt"
"io"
)
const (
// NoCompression means writing raw chunk data into files.
// With other choices, chunks are compressed before written.
NoCompression = iota
// Snappy had been the default compressing algorithm widely
// used in Google. It compromises between speech and
// compression ratio.
Snappy
// Gzip is a well-known compression algorithm. It is
// recommmended only you are looking for compression ratio.
Gzip
magicNumber uint32 = 0x01020304
defaultCompressor = Snappy
)
// Header is the metadata of Chunk.
type Header struct {
checkSum uint32
compressor uint32
compressedSize uint32
numRecords uint32
}
func (c *Header) write(w io.Writer) (int, error) {
var buf [20]byte
binary.LittleEndian.PutUint32(buf[0:4], magicNumber)
binary.LittleEndian.PutUint32(buf[4:8], c.checkSum)
binary.LittleEndian.PutUint32(buf[8:12], c.compressor)
binary.LittleEndian.PutUint32(buf[12:16], c.compressedSize)
binary.LittleEndian.PutUint32(buf[16:20], c.numRecords)
return w.Write(buf[:])
}
func parseHeader(r io.Reader) (*Header, error) {
var buf [20]byte
if _, e := r.Read(buf[:]); e != nil {
return nil, e
}
if v := binary.LittleEndian.Uint32(buf[0:4]); v != magicNumber {
return nil, fmt.Errorf("Failed to parse magic number")
}
return &Header{
checkSum: binary.LittleEndian.Uint32(buf[4:8]),
compressor: binary.LittleEndian.Uint32(buf[8:12]),
compressedSize: binary.LittleEndian.Uint32(buf[12:16]),
numRecords: binary.LittleEndian.Uint32(buf[16:20]),
}, nil
}
package recordio
import "io"
// Index consists offsets and sizes of the consequetive chunks in a RecordIO file.
type Index struct {
chunkOffsets []int64
chunkLens []uint32
numRecords int // the number of all records in a file.
chunkRecords []int // the number of records in chunks.
}
// LoadIndex scans the file and parse chunkOffsets, chunkLens, and len.
func LoadIndex(r io.ReadSeeker) (*Index, error) {
f := &Index{}
offset := int64(0)
var e error
var hdr *Header
for {
hdr, e = parseHeader(r)
if e != nil {
break
}
f.chunkOffsets = append(f.chunkOffsets, offset)
f.chunkLens = append(f.chunkLens, hdr.numRecords)
f.chunkRecords = append(f.chunkRecords, int(hdr.numRecords))
f.numRecords += int(hdr.numRecords)
offset, e = r.Seek(int64(hdr.compressedSize), io.SeekCurrent)
if e != nil {
break
}
}
if e == io.EOF {
return f, nil
}
return nil, e
}
// NumRecords returns the total number of records in a RecordIO file.
func (r *Index) NumRecords() int {
return r.numRecords
}
// NumChunks returns the total number of chunks in a RecordIO file.
func (r *Index) NumChunks() int {
return len(r.chunkLens)
}
// ChunkIndex return the Index of i-th Chunk.
func (r *Index) ChunkIndex(i int) *Index {
idx := &Index{}
idx.chunkOffsets = []int64{r.chunkOffsets[i]}
idx.chunkLens = []uint32{r.chunkLens[i]}
idx.chunkRecords = []int{r.chunkRecords[i]}
idx.numRecords = idx.chunkRecords[0]
return idx
}
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
// record is out of range.
func (r *Index) Locate(recordIndex int) (int, int) {
sum := 0
for i, l := range r.chunkLens {
sum += int(l)
if recordIndex < sum {
return i, recordIndex - sum + int(l)
}
}
return -1, -1
}
// RangeScanner scans records in a specified range within [0, numRecords).
type RangeScanner struct {
reader io.ReadSeeker
index *Index
start, end, cur int
chunkIndex int
chunk *Chunk
err error
}
// NewRangeScanner creates a scanner that sequencially reads records in the
// range [start, start+len). If start < 0, it scans from the
// beginning. If len < 0, it scans till the end of file.
func NewRangeScanner(r io.ReadSeeker, index *Index, start, len int) *RangeScanner {
if start < 0 {
start = 0
}
if len < 0 || start+len >= index.NumRecords() {
len = index.NumRecords() - start
}
return &RangeScanner{
reader: r,
index: index,
start: start,
end: start + len,
cur: start - 1, // The intial status required by Scan.
chunkIndex: -1,
chunk: &Chunk{},
}
}
// Scan moves the cursor forward for one record and loads the chunk
// containing the record if not yet.
func (s *RangeScanner) Scan() bool {
s.cur++
if s.cur >= s.end {
s.err = io.EOF
} else {
if ci, _ := s.index.Locate(s.cur); s.chunkIndex != ci {
s.chunkIndex = ci
s.chunk, s.err = parseChunk(s.reader, s.index.chunkOffsets[ci])
}
}
return s.err == nil
}
// Record returns the record under the current cursor.
func (s *RangeScanner) Record() []byte {
_, ri := s.index.Locate(s.cur)
return s.chunk.records[ri]
}
// Err returns the first non-EOF error that was encountered by the
// Scanner.
func (s *RangeScanner) Err() error {
if s.err == io.EOF {
return nil
}
return s.err
}
package recordio
import (
"bytes"
"testing"
"unsafe"
"github.com/stretchr/testify/assert"
)
func TestChunkHead(t *testing.T) {
assert := assert.New(t)
c := &Header{
checkSum: 123,
compressor: 456,
compressedSize: 789,
}
var buf bytes.Buffer
_, e := c.write(&buf)
assert.Nil(e)
cc, e := parseHeader(&buf)
assert.Nil(e)
assert.Equal(c, cc)
}
func TestWriteAndRead(t *testing.T) {
assert := assert.New(t)
data := []string{
"12345",
"1234",
"12"}
var buf bytes.Buffer
w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
n, e := w.Write([]byte(data[0])) // not exceed chunk size.
assert.Nil(e)
assert.Equal(5, n)
n, e = w.Write([]byte(data[1])) // not exceed chunk size.
assert.Nil(e)
assert.Equal(4, n)
n, e = w.Write([]byte(data[2])) // exeeds chunk size, dump and create a new chunk.
assert.Nil(e)
assert.Equal(n, 2)
assert.Nil(w.Close()) // flush the second chunk.
assert.Nil(w.Writer)
n, e = w.Write([]byte("anything")) // not effective after close.
assert.NotNil(e)
assert.Equal(n, 0)
idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
assert.Nil(e)
assert.Equal([]uint32{2, 1}, idx.chunkLens)
assert.Equal(
[]int64{0,
int64(4 + // magic number
unsafe.Sizeof(Header{}) +
5 + // first record
4 + // second record
2*4)}, // two record legnths
idx.chunkOffsets)
s := NewRangeScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
i := 0
for s.Scan() {
assert.Equal(data[i], string(s.Record()))
i++
}
}
func TestWriteEmptyFile(t *testing.T) {
assert := assert.New(t)
var buf bytes.Buffer
w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
assert.Nil(w.Close())
assert.Equal(0, buf.Len())
idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
assert.Nil(e)
assert.Equal(0, idx.NumRecords())
}
package recordio_test
import (
"bytes"
"reflect"
"testing"
"github.com/PaddlePaddle/Paddle/go/recordio"
)
func TestWriteRead(t *testing.T) {
const total = 1000
var buf bytes.Buffer
w := recordio.NewWriter(&buf, 0, -1)
for i := 0; i < total; i++ {
_, err := w.Write(make([]byte, i))
if err != nil {
t.Fatal(err)
}
}
w.Close()
idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
if err != nil {
t.Fatal(err)
}
if idx.NumRecords() != total {
t.Fatal("num record does not match:", idx.NumRecords(), total)
}
s := recordio.NewRangeScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
i := 0
for s.Scan() {
if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
}
i++
}
if i != total {
t.Fatal("total count not match:", i, total)
}
}
func TestChunkIndex(t *testing.T) {
const total = 1000
var buf bytes.Buffer
w := recordio.NewWriter(&buf, 0, -1)
for i := 0; i < total; i++ {
_, err := w.Write(make([]byte, i))
if err != nil {
t.Fatal(err)
}
}
w.Close()
idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
if err != nil {
t.Fatal(err)
}
if idx.NumChunks() != total {
t.Fatal("unexpected chunk num:", idx.NumChunks(), total)
}
for i := 0; i < total; i++ {
newIdx := idx.ChunkIndex(i)
s := recordio.NewRangeScanner(bytes.NewReader(buf.Bytes()), newIdx, -1, -1)
j := 0
for s.Scan() {
if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
}
j++
}
if j != 1 {
t.Fatal("unexpected record per chunk:", j)
}
}
}
package recordio
import (
"fmt"
"os"
"path/filepath"
)
// Scanner is a scanner for multiple recordio files.
type Scanner struct {
paths []string
curFile *os.File
curScanner *RangeScanner
pathIdx int
end bool
err error
}
// NewScanner creates a new Scanner.
func NewScanner(paths ...string) (*Scanner, error) {
var ps []string
for _, s := range paths {
match, err := filepath.Glob(s)
if err != nil {
return nil, err
}
ps = append(ps, match...)
}
if len(ps) == 0 {
return nil, fmt.Errorf("no valid path provided: %v", paths)
}
return &Scanner{paths: ps}, nil
}
// Scan moves the cursor forward for one record and loads the chunk
// containing the record if not yet.
func (s *Scanner) Scan() bool {
if s.err != nil {
return false
}
if s.end {
return false
}
if s.curScanner == nil {
more, err := s.nextFile()
if err != nil {
s.err = err
return false
}
if !more {
s.end = true
return false
}
}
curMore := s.curScanner.Scan()
s.err = s.curScanner.Err()
if s.err != nil {
return curMore
}
if !curMore {
err := s.curFile.Close()
if err != nil {
s.err = err
return false
}
s.curFile = nil
more, err := s.nextFile()
if err != nil {
s.err = err
return false
}
if !more {
s.end = true
return false
}
return s.Scan()
}
return true
}
// Err returns the first non-EOF error that was encountered by the
// Scanner.
func (s *Scanner) Err() error {
return s.err
}
// Record returns the record under the current cursor.
func (s *Scanner) Record() []byte {
if s.curScanner == nil {
return nil
}
return s.curScanner.Record()
}
// Close release the resources.
func (s *Scanner) Close() error {
s.curScanner = nil
if s.curFile != nil {
err := s.curFile.Close()
s.curFile = nil
return err
}
return nil
}
func (s *Scanner) nextFile() (bool, error) {
if s.pathIdx >= len(s.paths) {
return false, nil
}
path := s.paths[s.pathIdx]
s.pathIdx++
f, err := os.Open(path)
if err != nil {
return false, err
}
idx, err := LoadIndex(f)
if err != nil {
f.Close()
return false, err
}
s.curFile = f
s.curScanner = NewRangeScanner(f, idx, 0, -1)
return true, nil
}
package recordio
import (
"fmt"
"io"
)
const (
defaultMaxChunkSize = 32 * 1024 * 1024
)
// Writer creates a RecordIO file.
type Writer struct {
io.Writer // Set to nil to mark a closed writer.
chunk *Chunk
maxChunkSize int // total records size, excluding metadata, before compression.
compressor int
}
// NewWriter creates a RecordIO file writer. Each chunk is compressed
// using the deflate algorithm given compression level. Note that
// level 0 means no compression and -1 means default compression.
func NewWriter(w io.Writer, maxChunkSize, compressor int) *Writer {
if maxChunkSize < 0 {
maxChunkSize = defaultMaxChunkSize
}
if compressor < 0 {
compressor = defaultCompressor
}
return &Writer{
Writer: w,
chunk: &Chunk{},
maxChunkSize: maxChunkSize,
compressor: compressor}
}
// Writes a record. It returns an error if Close has been called.
func (w *Writer) Write(record []byte) (int, error) {
if w.Writer == nil {
return 0, fmt.Errorf("Cannot write since writer had been closed")
}
if w.chunk.numBytes+len(record) > w.maxChunkSize {
if e := w.chunk.dump(w.Writer, w.compressor); e != nil {
return 0, e
}
}
w.chunk.add(record)
return len(record), nil
}
// Close flushes the current chunk and makes the writer invalid.
func (w *Writer) Close() error {
e := w.chunk.dump(w.Writer, w.compressor)
w.Writer = nil
return e
}
...@@ -217,10 +217,10 @@ void SmoothL1CostLayer::forwardImp(Matrix& output, ...@@ -217,10 +217,10 @@ void SmoothL1CostLayer::forwardImp(Matrix& output,
targetCpu->copyFrom(target); targetCpu->copyFrom(target);
outputCpu->copyFrom(output); outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value); labelCpu->copyFrom(*label.value);
targetCpu->smoothL1(*outputCpu, *labelCpu); targetCpu->smoothL1(*outputCpu, *labelCpu, 1.0);
target.copyFrom(*targetCpu); target.copyFrom(*targetCpu);
} else { } else {
target.smoothL1(output, *label.value); target.smoothL1(output, *label.value, 1.0);
} }
} }
...@@ -238,10 +238,10 @@ void SmoothL1CostLayer::backwardImp(Matrix& output, ...@@ -238,10 +238,10 @@ void SmoothL1CostLayer::backwardImp(Matrix& output,
outputGCpu->copyFrom(outputG); outputGCpu->copyFrom(outputG);
outputCpu->copyFrom(output); outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value); labelCpu->copyFrom(*label.value);
outputGCpu->smoothL1Bp(*outputCpu, *labelCpu); outputGCpu->smoothL1Bp(*outputCpu, *labelCpu, 1.0);
outputG.copyFrom(*outputGCpu); outputG.copyFrom(*outputGCpu);
} else { } else {
outputG.smoothL1Bp(output, *label.value); outputG.smoothL1Bp(output, *label.value, 1.0);
} }
} }
......
...@@ -59,7 +59,7 @@ lstm_nest_group = recurrent_group( ...@@ -59,7 +59,7 @@ lstm_nest_group = recurrent_group(
input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group") input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group")
# hasSubseq ->(seqlastins) seq # hasSubseq ->(seqlastins) seq
lstm_last = last_seq( lstm_last = last_seq(
input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE) input=lstm_nest_group, agg_level=AggregateLevel.TO_SEQUENCE)
# seq ->(expand) hasSubseq # seq ->(expand) hasSubseq
lstm_expand = expand_layer( lstm_expand = expand_layer(
...@@ -71,7 +71,7 @@ lstm_expand = expand_layer( ...@@ -71,7 +71,7 @@ lstm_expand = expand_layer(
lstm_average = pooling_layer( lstm_average = pooling_layer(
input=lstm_expand, input=lstm_expand,
pooling_type=AvgPooling(), pooling_type=AvgPooling(),
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.TO_SEQUENCE)
with mixed_layer( with mixed_layer(
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
......
cc_library(place SRCS place.cc) cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
cc_library(ddim SRCS ddim.cc) cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
if(WITH_TESTING) nv_test(cuda_test SRCS cuda_test.cu)
add_subdirectory(test) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
endif()
#include "paddle/majel/place.h" #include "paddle/majel/place.h"
#include <sstream> #include <sstream>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/utils/Logging.h"
TEST(Place, Equality) { TEST(Place, Equality) {
majel::CpuPlace cpu; majel::CpuPlace cpu;
...@@ -38,5 +37,4 @@ TEST(Place, Print) { ...@@ -38,5 +37,4 @@ TEST(Place, Print) {
ss << majel::CpuPlace(); ss << majel::CpuPlace();
EXPECT_EQ("CpuPlace", ss.str()); EXPECT_EQ("CpuPlace", ss.str());
} }
LOG(INFO) << "\n[----------] Done \n";
} }
cc_test(place_test
SRCS place_test.cc
DEPS place)
cc_test(ddim_test
SRCS ddim_test.cc
DEPS ddim)
if(WITH_GPU)
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
endif()
...@@ -3606,7 +3606,7 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) { ...@@ -3606,7 +3606,7 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) {
} }
} }
void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { void CpuMatrix::smoothL1(Matrix& output, Matrix& label, real destScale) {
CHECK(output.useGpu_ == false && label.useGpu_ == false) CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal"; << "Matrix type are not equal";
...@@ -3624,6 +3624,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { ...@@ -3624,6 +3624,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) { for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) { for (size_t j = 0; j < dim; ++j) {
real absVal = std::fabs(out[j] - lbl[j]); real absVal = std::fabs(out[j] - lbl[j]);
cost[i] *= destScale;
if (absVal < 1.0) if (absVal < 1.0)
cost[i] += 0.5 * absVal * absVal; cost[i] += 0.5 * absVal * absVal;
else else
...@@ -3632,7 +3633,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { ...@@ -3632,7 +3633,7 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
} }
} }
void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label, real destScale) {
CHECK(output.useGpu_ == false && label.useGpu_ == false) CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal"; << "Matrix type are not equal";
...@@ -3650,6 +3651,7 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { ...@@ -3650,6 +3651,7 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) { for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) { for (size_t j = 0; j < dim; ++j) {
real val = out[j] - lbl[j]; real val = out[j] - lbl[j];
grad[j] *= destScale;
if (std::fabs(val) < 1) { if (std::fabs(val) < 1) {
grad[j] += val; grad[j] += val;
} else { } else {
......
...@@ -789,11 +789,11 @@ public: ...@@ -789,11 +789,11 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
virtual void smoothL1(Matrix& output, Matrix& label) { virtual void smoothL1(Matrix& output, Matrix& label, real destScale) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
virtual void smoothL1Bp(Matrix& outputV, Matrix& label) { virtual void smoothL1Bp(Matrix& outputV, Matrix& label, real destScale) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -1736,8 +1736,8 @@ public: ...@@ -1736,8 +1736,8 @@ public:
/// gradient of sumOfSquares. /// gradient of sumOfSquares.
void sumOfSquaresBp(Matrix& outputV, Matrix& label); void sumOfSquaresBp(Matrix& outputV, Matrix& label);
void smoothL1(Matrix& output, Matrix& label); void smoothL1(Matrix& output, Matrix& label, real destScale);
void smoothL1Bp(Matrix& output, Matrix& label); void smoothL1Bp(Matrix& output, Matrix& label, real destScale);
void tanh(Matrix& output); void tanh(Matrix& output);
void tanhDerivative(Matrix& output); void tanhDerivative(Matrix& output);
......
...@@ -21,6 +21,8 @@ cd /paddle/build ...@@ -21,6 +21,8 @@ cd /paddle/build
# build script will not fail if *.deb does not exist # build script will not fail if *.deb does not exist
rm *.deb 2>/dev/null || true rm *.deb 2>/dev/null || true
# delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true
cat <<EOF cat <<EOF
======================================== ========================================
......
...@@ -237,16 +237,19 @@ class AggregateLevel(object): ...@@ -237,16 +237,19 @@ class AggregateLevel(object):
Accordingly, AggregateLevel supports two modes: Accordingly, AggregateLevel supports two modes:
- :code:`AggregateLevel.EACH_TIMESTEP` means the aggregation acts on each - :code:`AggregateLevel.TO_NO_SEQUENCE` means the aggregation acts on each
timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will
be aggregated to :code:`NO_SEQUENCE`. be aggregated to :code:`NO_SEQUENCE`.
- :code:`AggregateLevel.EACH_SEQUENCE` means the aggregation acts on each - :code:`AggregateLevel.TO_SEQUENCE` means the aggregation acts on each
sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to
:code:`SEQUENCE`. :code:`SEQUENCE`.
""" """
EACH_TIMESTEP = 'non-seq' TO_NO_SEQUENCE = 'non-seq'
EACH_SEQUENCE = 'seq' TO_SEQUENCE = 'seq'
# compatible with previous configuration
EACH_TIMESTEP = TO_NO_SEQUENCE
EACH_SEQUENCE = TO_SEQUENCE
class LayerOutput(object): class LayerOutput(object):
...@@ -482,7 +485,7 @@ def table_projection(input, size=0, param_attr=None): ...@@ -482,7 +485,7 @@ def table_projection(input, size=0, param_attr=None):
return proj return proj
def identity_projection(input, offset=None): def identity_projection(input, offset=None, size=None):
""" """
1. IdentityProjection if offset=None. It performs: 1. IdentityProjection if offset=None. It performs:
...@@ -523,8 +526,10 @@ def identity_projection(input, offset=None): ...@@ -523,8 +526,10 @@ def identity_projection(input, offset=None):
proj = IdentityProjection(input_layer_name=input.name) proj = IdentityProjection(input_layer_name=input.name)
proj.origin = input proj.origin = input
else: else:
if size is None:
size = input.size - offset
proj = IdentityOffsetProjection( proj = IdentityOffsetProjection(
input_layer_name=input.name, offset=offset) input_layer_name=input.name, offset=offset, size=size)
proj.origin = input proj.origin = input
return proj return proj
...@@ -1081,7 +1086,7 @@ def pooling_layer(input, ...@@ -1081,7 +1086,7 @@ def pooling_layer(input,
pooling_type=None, pooling_type=None,
name=None, name=None,
bias_attr=None, bias_attr=None,
agg_level=AggregateLevel.EACH_TIMESTEP, agg_level=AggregateLevel.TO_NO_SEQUENCE,
layer_attr=None): layer_attr=None):
""" """
Pooling layer for sequence inputs, not used for Image. Pooling layer for sequence inputs, not used for Image.
...@@ -1092,10 +1097,10 @@ def pooling_layer(input, ...@@ -1092,10 +1097,10 @@ def pooling_layer(input,
seq_pool = pooling_layer(input=layer, seq_pool = pooling_layer(input=layer,
pooling_type=AvgPooling(), pooling_type=AvgPooling(),
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.TO_NO_SEQUENCE)
:param agg_level: AggregateLevel.EACH_TIMESTEP or :param agg_level: AggregateLevel.TO_NO_SEQUENCE or
AggregateLevel.EACH_SEQUENCE AggregateLevel.TO_SEQUENCE
:type agg_level: AggregateLevel :type agg_level: AggregateLevel
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
...@@ -1365,7 +1370,7 @@ def grumemory(input, ...@@ -1365,7 +1370,7 @@ def grumemory(input,
@layer_support() @layer_support()
def last_seq(input, def last_seq(input,
name=None, name=None,
agg_level=AggregateLevel.EACH_TIMESTEP, agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1, stride=-1,
layer_attr=None): layer_attr=None):
""" """
...@@ -1400,7 +1405,7 @@ def last_seq(input, ...@@ -1400,7 +1405,7 @@ def last_seq(input,
" series information at all. Maybe you want to use" " series information at all. Maybe you want to use"
" first_seq instead.") " first_seq instead.")
if agg_level == AggregateLevel.EACH_SEQUENCE: if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1 assert stride == -1
Layer( Layer(
...@@ -1421,7 +1426,7 @@ def last_seq(input, ...@@ -1421,7 +1426,7 @@ def last_seq(input,
@layer_support() @layer_support()
def first_seq(input, def first_seq(input,
name=None, name=None,
agg_level=AggregateLevel.EACH_TIMESTEP, agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1, stride=-1,
layer_attr=None): layer_attr=None):
""" """
...@@ -1457,7 +1462,7 @@ def first_seq(input, ...@@ -1457,7 +1462,7 @@ def first_seq(input,
' time series information at all. Maybe you want to use' ' time series information at all. Maybe you want to use'
' last_seq instead.') ' last_seq instead.')
if agg_level == AggregateLevel.EACH_SEQUENCE: if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1 assert stride == -1
Layer( Layer(
...@@ -1480,16 +1485,18 @@ class ExpandLevel(object): ...@@ -1480,16 +1485,18 @@ class ExpandLevel(object):
ExpandLevel supports two modes: ExpandLevel supports two modes:
- :code:`ExpandLevel.FROM_TIMESTEP` means the expandation acts on each - :code:`ExpandLevel.FROM_NO_SEQUENCE` means the expansion acts on
timestep of a sequence, :code:`NO_SEQUENCE` will be expanded to :code:`NO_SEQUENCE`, which will be expanded to
:code:`SEQUENCE` or :code:`SUB_SEQUENCE`. :code:`SEQUENCE` or :code:`SUB_SEQUENCE`.
- :code:`ExpandLevel.FROM_SEQUENCE` means the expandation acts on each - :code:`ExpandLevel.FROM_SEQUENCE` means the expansion acts on
sequence of a nested sequence, :code:`SEQUENCE` will be expanded to :code:`SEQUENCE`, which will be expanded to
:code:`SUB_SEQUENCE`. :code:`SUB_SEQUENCE`.
""" """
FROM_TIMESTEP = AggregateLevel.EACH_TIMESTEP FROM_NO_SEQUENCE = AggregateLevel.TO_NO_SEQUENCE
FROM_SEQUENCE = AggregateLevel.EACH_SEQUENCE FROM_SEQUENCE = AggregateLevel.TO_SEQUENCE
# compatible with previous configuration
FROM_TIMESTEP = FROM_NO_SEQUENCE
@wrap_name_default() @wrap_name_default()
...@@ -1498,7 +1505,7 @@ def expand_layer(input, ...@@ -1498,7 +1505,7 @@ def expand_layer(input,
expand_as, expand_as,
name=None, name=None,
bias_attr=False, bias_attr=False,
expand_level=ExpandLevel.FROM_TIMESTEP, expand_level=ExpandLevel.FROM_NO_SEQUENCE,
layer_attr=None): layer_attr=None):
""" """
A layer for "Expand Dense data or (sequence data where the length of each A layer for "Expand Dense data or (sequence data where the length of each
...@@ -1510,7 +1517,7 @@ def expand_layer(input, ...@@ -1510,7 +1517,7 @@ def expand_layer(input,
expand = expand_layer(input=layer1, expand = expand_layer(input=layer1,
expand_as=layer2, expand_as=layer2,
expand_level=ExpandLevel.FROM_TIMESTEP) expand_level=ExpandLevel.FROM_NO_SEQUENCE)
:param input: Input layer :param input: Input layer
:type input: LayerOutput :type input: LayerOutput
...@@ -2797,7 +2804,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -2797,7 +2804,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
if layer_type == LayerType.CONCAT_LAYER: if layer_type == LayerType.CONCAT_LAYER:
assert not bias_attr assert not bias_attr
Layer( layer = Layer(
name=name, name=name,
type=layer_type, type=layer_type,
inputs=[x.name for x in input] if is_concat_layer else input, inputs=[x.name for x in input] if is_concat_layer else input,
...@@ -2805,13 +2812,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -2805,13 +2812,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
sz = 0 sz = layer.config.size
for each_input in input:
if each_input.size is not None:
sz += each_input.size
else:
sz = None
break
return LayerOutput( return LayerOutput(
name, name,
...@@ -2979,7 +2980,7 @@ def memory(name, ...@@ -2979,7 +2980,7 @@ def memory(name,
@layer_support() @layer_support()
def lstm_step_layer(input, def lstm_step_layer(input,
state, state,
size, size=None,
act=None, act=None,
name=None, name=None,
gate_act=None, gate_act=None,
...@@ -3045,6 +3046,9 @@ def lstm_step_layer(input, ...@@ -3045,6 +3046,9 @@ def lstm_step_layer(input,
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
assert size is None or state.size == size
size = state.size
Layer( Layer(
name=name, name=name,
type=LayerType.LSTM_STEP_LAYER, type=LayerType.LSTM_STEP_LAYER,
...@@ -3052,7 +3056,7 @@ def lstm_step_layer(input, ...@@ -3052,7 +3056,7 @@ def lstm_step_layer(input,
active_gate_type=gate_act.name, active_gate_type=gate_act.name,
active_state_type=state_act.name, active_state_type=state_act.name,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
size=size, size=state.size,
inputs=[input.name, state.name], inputs=[input.name, state.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
......
...@@ -6,7 +6,7 @@ din = data_layer(name='data', size=30) ...@@ -6,7 +6,7 @@ din = data_layer(name='data', size=30)
seq_op = [first_seq, last_seq] seq_op = [first_seq, last_seq]
agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP] agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
opts = [] opts = []
...@@ -15,6 +15,7 @@ for op in seq_op: ...@@ -15,6 +15,7 @@ for op in seq_op:
opts.append(op(input=din, agg_level=al)) opts.append(op(input=din, agg_level=al))
for op in seq_op: for op in seq_op:
opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5)) opts.append(
op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5))
outputs(opts) outputs(opts)
...@@ -9,4 +9,6 @@ outputs( ...@@ -9,4 +9,6 @@ outputs(
expand_layer( expand_layer(
input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE), input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE),
expand_layer( expand_layer(
input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_TIMESTEP)) input=din,
expand_as=data_seq,
expand_level=ExpandLevel.FROM_NO_SEQUENCE))
...@@ -6,7 +6,7 @@ din = data_layer(name='dat_in', size=100) ...@@ -6,7 +6,7 @@ din = data_layer(name='dat_in', size=100)
POOL_TYPE = [MaxPooling, AvgPooling, SumPooling] POOL_TYPE = [MaxPooling, AvgPooling, SumPooling]
AGG_LEVEL = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP] AGG_LEVEL = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
opts = [] opts = []
......
...@@ -39,6 +39,10 @@ def make_layer_label(layer_config): ...@@ -39,6 +39,10 @@ def make_layer_label(layer_config):
def make_diagram(config_file, dot_file, config_arg_str): def make_diagram(config_file, dot_file, config_arg_str):
config = parse_config(config_file, config_arg_str) config = parse_config(config_file, config_arg_str)
make_diagram_from_proto(config.model_config, dot_file)
def make_diagram_from_proto(model_config, dot_file):
# print >> sys.stderr, config # print >> sys.stderr, config
name2id = {} name2id = {}
f = open(dot_file, 'w') f = open(dot_file, 'w')
...@@ -59,12 +63,12 @@ def make_diagram(config_file, dot_file, config_arg_str): ...@@ -59,12 +63,12 @@ def make_diagram(config_file, dot_file, config_arg_str):
print >> f, 'digraph graphname {' print >> f, 'digraph graphname {'
print >> f, 'node [width=0.375,height=0.25];' print >> f, 'node [width=0.375,height=0.25];'
for i in xrange(len(config.model_config.layers)): for i in xrange(len(model_config.layers)):
l = config.model_config.layers[i] l = model_config.layers[i]
name2id[l.name] = i name2id[l.name] = i
i = 0 i = 0
for sub_model in config.model_config.sub_models: for sub_model in model_config.sub_models:
if sub_model.name == 'root': if sub_model.name == 'root':
continue continue
print >> f, 'subgraph cluster_%s {' % i print >> f, 'subgraph cluster_%s {' % i
...@@ -78,18 +82,18 @@ def make_diagram(config_file, dot_file, config_arg_str): ...@@ -78,18 +82,18 @@ def make_diagram(config_file, dot_file, config_arg_str):
for layer_name in sub_model.layer_names: for layer_name in sub_model.layer_names:
submodel_layers.add(layer_name) submodel_layers.add(layer_name)
lid = name2id[layer_name] lid = name2id[layer_name]
layer_config = config.model_config.layers[lid] layer_config = model_config.layers[lid]
label = make_layer_label(layer_config) label = make_layer_label(layer_config)
print >> f, 'l%s [label="%s", shape=box];' % (lid, label) print >> f, 'l%s [label="%s", shape=box];' % (lid, label)
print >> f, '}' print >> f, '}'
for i in xrange(len(config.model_config.layers)): for i in xrange(len(model_config.layers)):
l = config.model_config.layers[i] l = model_config.layers[i]
if l.name not in submodel_layers: if l.name not in submodel_layers:
label = make_layer_label(l) label = make_layer_label(l)
print >> f, 'l%s [label="%s", shape=box];' % (i, label) print >> f, 'l%s [label="%s", shape=box];' % (i, label)
for sub_model in config.model_config.sub_models: for sub_model in model_config.sub_models:
if sub_model.name == 'root': if sub_model.name == 'root':
continue continue
for link in sub_model.in_links: for link in sub_model.in_links:
...@@ -99,8 +103,8 @@ def make_diagram(config_file, dot_file, config_arg_str): ...@@ -99,8 +103,8 @@ def make_diagram(config_file, dot_file, config_arg_str):
for mem in sub_model.memories: for mem in sub_model.memories:
print >> f, make_mem(mem) print >> f, make_mem(mem)
for i in xrange(len(config.model_config.layers)): for i in xrange(len(model_config.layers)):
for l in config.model_config.layers[i].inputs: for l in model_config.layers[i].inputs:
print >> f, 'l%s -> l%s [label="%s"];' % ( print >> f, 'l%s -> l%s [label="%s"];' % (
name2id[l.input_layer_name], i, l.input_parameter_name) name2id[l.input_layer_name], i, l.input_parameter_name)
......
...@@ -19,8 +19,10 @@ import shutil ...@@ -19,8 +19,10 @@ import shutil
import sys import sys
import importlib import importlib
import paddle.v2.dataset import paddle.v2.dataset
import cPickle
import glob
__all__ = ['DATA_HOME', 'download', 'md5file'] __all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader']
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
...@@ -74,3 +76,76 @@ def fetch_all(): ...@@ -74,3 +76,76 @@ def fetch_all():
getattr( getattr(
importlib.import_module("paddle.v2.dataset.%s" % module_name), importlib.import_module("paddle.v2.dataset.%s" % module_name),
"fetch")() "fetch")()
def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
"""
you can call the function as:
split(paddle.v2.dataset.cifar.train10(), line_count=1000,
suffix="imikolov-train-%05d.pickle")
the output files as:
|-imikolov-train-00000.pickle
|-imikolov-train-00001.pickle
|- ...
|-imikolov-train-00480.pickle
:param reader: is a reader creator
:param line_count: line count for each file
:param suffix: the suffix for the output files, should contain "%d"
means the id for each file. Default is "%05d.pickle"
:param dumper: is a callable function that dump object to file, this
function will be called as dumper(obj, f) and obj is the object
will be dumped, f is a file object. Default is cPickle.dump.
"""
if not callable(dumper):
raise TypeError("dumper should be callable.")
lines = []
indx_f = 0
for i, d in enumerate(reader()):
lines.append(d)
if i >= line_count and i % line_count == 0:
with open(suffix % indx_f, "w") as f:
dumper(lines, f)
lines = []
indx_f += 1
if lines:
with open(suffix % indx_f, "w") as f:
dumper(lines, f)
def cluster_files_reader(files_pattern,
trainer_count,
trainer_id,
loader=cPickle.load):
"""
Create a reader that yield element from the given files, select
a file set according trainer count and trainer_id
:param files_pattern: the files which generating by split(...)
:param trainer_count: total trainer count
:param trainer_id: the trainer rank id
:param loader: is a callable function that load object from file, this
function will be called as loader(f) and f is a file object.
Default is cPickle.load
"""
def reader():
if not callable(loader):
raise TypeError("loader should be callable.")
file_list = glob.glob(files_pattern)
file_list.sort()
my_file_list = []
for idx, fn in enumerate(file_list):
if idx % trainer_count == trainer_id:
print "append file: %s" % fn
my_file_list.append(fn)
for fn in my_file_list:
with open(fn, "r") as f:
lines = loader(f)
for line in lines:
yield line
return reader
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import paddle.v2.dataset.common import paddle.v2.dataset.common
import unittest import unittest
import tempfile import tempfile
import glob
class TestCommon(unittest.TestCase): class TestCommon(unittest.TestCase):
...@@ -32,6 +33,30 @@ class TestCommon(unittest.TestCase): ...@@ -32,6 +33,30 @@ class TestCommon(unittest.TestCase):
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d')) yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
def test_split(self):
def test_reader():
def reader():
for x in xrange(10):
yield x
return reader
_, temp_path = tempfile.mkstemp()
paddle.v2.dataset.common.split(
test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
files = glob.glob(temp_path + '/test-%05d.pickle')
self.assertEqual(len(files), 3)
def test_cluster_file_reader(self):
_, temp_path = tempfile.mkstemp()
for x in xrange(5):
with open(temp_path + '/%05d.test' % x) as f:
f.write('%d\n' % x)
reader = paddle.v2.dataset.common.cluster_files_reader(
temp_path + '/*.test', 5, 0)
for idx, e in enumerate(reader()):
self.assertEqual(e, str("0"))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -152,7 +152,7 @@ def __get_used_layers__(output_layers, extra_layers=None): ...@@ -152,7 +152,7 @@ def __get_used_layers__(output_layers, extra_layers=None):
return layer_names return layer_names
def __get_used_parameters__(layer_names): def __get_used_parameters__(layer_names, sub_models):
parameter_names = set() parameter_names = set()
for name in layer_names: for name in layer_names:
l = cp.g_layer_map[name] l = cp.g_layer_map[name]
...@@ -161,6 +161,12 @@ def __get_used_parameters__(layer_names): ...@@ -161,6 +161,12 @@ def __get_used_parameters__(layer_names):
parameter_names.add(inp.input_parameter_name) parameter_names.add(inp.input_parameter_name)
if l.bias_parameter_name: if l.bias_parameter_name:
parameter_names.add(l.bias_parameter_name) parameter_names.add(l.bias_parameter_name)
for sub_model in sub_models:
for mem in sub_model.memories:
if mem.HasField("boot_bias_parameter_name"):
parameter_names.add(mem.boot_bias_parameter_name)
return parameter_names return parameter_names
...@@ -236,7 +242,6 @@ def parse_network(output_layers, extra_layers=None): ...@@ -236,7 +242,6 @@ def parse_network(output_layers, extra_layers=None):
layer_names = __get_used_layers__(output_layers + extra_layers) layer_names = __get_used_layers__(output_layers + extra_layers)
submodel_names = __get_used_submodels__(layer_names) submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root') submodel_names.add('root')
parameter_names = __get_used_parameters__(layer_names)
evaluator_names = __get_used_evaluators__(layer_names) evaluator_names = __get_used_evaluators__(layer_names)
input_layer_names = set() input_layer_names = set()
output_layer_names = set() output_layer_names = set()
...@@ -251,10 +256,6 @@ def parse_network(output_layers, extra_layers=None): ...@@ -251,10 +256,6 @@ def parse_network(output_layers, extra_layers=None):
model_config.input_layer_names.append(l.name) model_config.input_layer_names.append(l.name)
input_layer_names.add(l.name) input_layer_names.add(l.name)
for p in cp.g_config.model_config.parameters:
if p.name in parameter_names:
model_config.parameters.extend([p])
for layer in output_layers: for layer in output_layers:
model_config.output_layer_names.append(layer.full_name) model_config.output_layer_names.append(layer.full_name)
output_layer_names.add(layer.full_name) output_layer_names.add(layer.full_name)
...@@ -269,6 +270,13 @@ def parse_network(output_layers, extra_layers=None): ...@@ -269,6 +270,13 @@ def parse_network(output_layers, extra_layers=None):
output_layer_names, evaluator_names) output_layer_names, evaluator_names)
model_config.sub_models.extend([s]) model_config.sub_models.extend([s])
parameter_names = __get_used_parameters__(layer_names,
model_config.sub_models)
for p in cp.g_config.model_config.parameters:
if p.name in parameter_names:
model_config.parameters.extend([p])
return model_config return model_config
......
...@@ -73,7 +73,7 @@ class AggregateLayerTest(unittest.TestCase): ...@@ -73,7 +73,7 @@ class AggregateLayerTest(unittest.TestCase):
pool = layer.pooling( pool = layer.pooling(
input=pixel, input=pixel,
pooling_type=pooling.Avg(), pooling_type=pooling.Avg(),
agg_level=layer.AggregateLevel.EACH_SEQUENCE) agg_level=layer.AggregateLevel.TO_SEQUENCE)
last_seq = layer.last_seq(input=pixel) last_seq = layer.last_seq(input=pixel)
first_seq = layer.first_seq(input=pixel) first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq]) concat = layer.concat(input=[last_seq, first_seq])
...@@ -109,7 +109,7 @@ class ReshapeLayerTest(unittest.TestCase): ...@@ -109,7 +109,7 @@ class ReshapeLayerTest(unittest.TestCase):
expand = layer.expand( expand = layer.expand(
input=weight, input=weight,
expand_as=pixel, expand_as=pixel,
expand_level=layer.ExpandLevel.FROM_TIMESTEP) expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE)
repeat = layer.repeat(input=pixel, num_repeats=4) repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49) rotate = layer.rotate(input=pixel, height=16, width=49)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册