提交 9c1c19b6 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/c_api

.gitignore
\ No newline at end of file
*.DS_Store
build/
*.user
.vscode
.idea
.project
.cproject
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
!build/*.deb
...@@ -7,6 +7,7 @@ build/ ...@@ -7,6 +7,7 @@ build/
.project .project
.cproject .cproject
.pydevproject .pydevproject
.settings/
Makefile Makefile
.test_env/ .test_env/
third_party/ third_party/
......
...@@ -40,7 +40,7 @@ option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) ...@@ -40,7 +40,7 @@ option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
...@@ -90,14 +90,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include") ...@@ -90,14 +90,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
set(EXTERNAL_LIBS set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES} ${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES} ${GLOG_LIBRARIES}
${CBLAS_LIBRARIES} ${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY} ${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES} ${ZLIB_LIBRARIES}
${PYTHON_LIBRARIES}
) )
if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)
add_subdirectory(proto) add_subdirectory(proto)
add_subdirectory(paddle) add_subdirectory(paddle)
add_subdirectory(python) add_subdirectory(python)
......
...@@ -3,20 +3,17 @@ ...@@ -3,20 +3,17 @@
FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables # ENV variables
ARG BUILD_WOBOQ ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_GPU ARG WITH_GPU
ARG WITH_AVX ARG WITH_AVX
ARG WITH_DOC ARG WITH_DOC
ARG WITH_STYLE_CHECK ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=${WITH_AVX:-OFF} ENV WITH_GPU=${WITH_AVX:-OFF}
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF} ENV WITH_DOC=${WITH_DOC:-OFF}
...@@ -31,7 +28,7 @@ RUN apt-get update && \ ...@@ -31,7 +28,7 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake locales clang-format-3.8 && \ apt-get install -y automake locales clang-format-3.8 swig && \
apt-get clean -y apt-get clean -y
# git credential to skip password typing # git credential to skip password typing
...@@ -51,8 +48,6 @@ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ ...@@ -51,8 +48,6 @@ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
RUN apt-get install -y swig
VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
...@@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl ...@@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
the capability of PaddlePaddle to make a huge impact for your product. the capability of PaddlePaddle to make a huge impact for your product.
## Installation ## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or It is recommended to check out the
directly build on **Linux** and **Mac OS X** from the source code. [Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
before looking into the
[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)
## Documentation ## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br> We provide [English](http://www.paddlepaddle.org/develop/doc/) and
You can follow the quick start tutorial to learn how use PaddlePaddle [Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
step-by-step.
- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br> You can also run distributed training jobs on Kubernetes clusters.
We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br> - [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html)
This system supports training deep learning models on multiple machines
with data parallelism.
- [Python API](http://paddlepaddle.org/doc/ui/) <br> Our new API enables much shorter programs.
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br> - [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br> We appreciate your contributions!
## Ask Questions ## Ask Questions
......
...@@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH) ...@@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
endif() endif()
endfunction() endfunction()
if(ON_COVERALLS) if(WITH_COVERAGE)
set(CMAKE_BUILD_TYPE "Debug") set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
......
...@@ -134,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) ...@@ -134,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov" # If -p is not specified then the file is named only "the_file.c.gcov"
# #
execute_process( execute_process(
COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null" COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null
WORKING_DIRECTORY ${GCDA_DIR} WORKING_DIRECTORY ${GCDA_DIR}
) )
endforeach() endforeach()
......
...@@ -45,7 +45,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -45,7 +45,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR> INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf 3.1) set(PROTOBUF_VERSION 3.1)
FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION})
IF(PROTOBUF_FOUND) IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
......
...@@ -90,26 +90,6 @@ function(link_paddle_exe TARGET_NAME) ...@@ -90,26 +90,6 @@ function(link_paddle_exe TARGET_NAME)
${RDMA_LD_FLAGS} ${RDMA_LD_FLAGS}
${RDMA_LIBS}) ${RDMA_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES} util)
endif()
if(WITH_GPU)
target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY})
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME )
if(HAVE_CLOCK_GETTIME)
target_link_libraries(${TARGET_NAME} rt)
endif()
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies}) add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction() endfunction()
......
...@@ -109,6 +109,12 @@ sum_to_one_norm ...@@ -109,6 +109,12 @@ sum_to_one_norm
:members: sum_to_one_norm :members: sum_to_one_norm
:noindex: :noindex:
cross_channel_norm
------------------
.. automodule:: paddle.v2.layer
:members: cross_channel_norm
:noindex:
Recurrent Layers Recurrent Layers
================ ================
......
...@@ -51,7 +51,7 @@ PaddlePaddle supports some build options. ...@@ -51,7 +51,7 @@ PaddlePaddle supports some build options.
<tr><td class="left">WITH_TIMER</td><td class="left">Compile PaddlePaddle with stats timer</td></tr> <tr><td class="left">WITH_TIMER</td><td class="left">Compile PaddlePaddle with stats timer</td></tr>
<tr><td class="left">WITH_PROFILER</td><td class="left">Compile PaddlePaddle with GPU profiler</td></tr> <tr><td class="left">WITH_PROFILER</td><td class="left">Compile PaddlePaddle with GPU profiler</td></tr>
<tr><td class="left">WITH_DOC</td><td class="left">Compile PaddlePaddle with documentation</td></tr> <tr><td class="left">WITH_DOC</td><td class="left">Compile PaddlePaddle with documentation</td></tr>
<tr><td class="left">ON_COVERALLS</td><td class="left">Compile PaddlePaddle with code coverage</td></tr> <tr><td class="left">WITH_COVERAGE</td><td class="left">Compile PaddlePaddle with code coverage</td></tr>
<tr><td class="left">COVERALLS_UPLOAD</td><td class="left">Package code coverage data to coveralls</td></tr> <tr><td class="left">COVERALLS_UPLOAD</td><td class="left">Package code coverage data to coveralls</td></tr>
<tr><td class="left">ON_TRAVIS</td><td class="left">Exclude special unit test on Travis CI</td></tr> <tr><td class="left">ON_TRAVIS</td><td class="left">Exclude special unit test on Travis CI</td></tr>
</tbody> </tbody>
......
FUNCTION(generate_python_api target_name)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
COMMAND ${SWIG_EXECUTABLE} -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
${external_project_dependencies}
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
ADD_CUSTOM_TARGET(${target_name} ALL DEPENDS
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${external_project_dependencies})
ENDFUNCTION(generate_python_api)
set(API_SOURCES set(API_SOURCES
Arguments.cpp Arguments.cpp
ConfigParser.cpp ConfigParser.cpp
...@@ -33,65 +15,86 @@ set(API_HEADER ...@@ -33,65 +15,86 @@ set(API_HEADER
PaddleAPI.h PaddleAPI.h
Internal.h) Internal.h)
add_library(paddle_api STATIC add_library(paddle_api STATIC ${API_SOURCES})
${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp) add_dependencies(paddle_api gen_proto_cpp)
list(LENGTH "${GFLAGS_LIBRARIES}" GFLAGS_LIBRARIES_LENGTH) INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
if(${GFLAGS_LIBRARIES_LENGTH} EQUAL 0 AND TARGET "${GFLAGS_LIBRARIES}") FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
# Because gflags compiled by cmake, so it is imported by cmake target,
# not a real library path. Get the real library path here. SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
message(STATUS "GFLAGS Libraries is ${GFLAGS_LIBRARIES}")
get_target_property(GFLAGS_LOCATION ${GFLAGS_LIBRARIES} LOCATION) SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
message(STATUS "GFLAGS Target location is ${GFLAGS_LOCATION}") SET(CMAKE_CXX_FLAGS "-std=c++11 -fPIC -Wall")
else() IF(WITH_COVERAGE)
set(GFLAGS_LOCATION ${GFLAGS_LIBRARIES}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
endif() ENDIF(WITH_COVERAGE)
configure_file( SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_api_config.py.in paddle_parameter
${PROJ_ROOT}/paddle/api/paddle_api_config.py paddle_function
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_api
paddle_cuda
paddle_trainer_lib
paddle_network
paddle_proto
${external_project_dependencies}
) )
generate_python_api(python_swig_sources) IF(APPLE)
SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load")
ELSE(APPLE)
SET(START_GROUP "-Xlinker -start-group")
SET(END_GROUP "-Xlinker -end-group")
SET(ARCHIVE_START "-Wl,--whole-archive")
SET(ARCHIVE_END "-Wl,--no-whole-archive")
ENDIF(APPLE)
file(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) SWIG_ADD_MODULE(swig_paddle python Paddle.i)
SWIG_LINK_LIBRARIES(swig_paddle
${MACOS_LD_FLAGS}
${START_GROUP}
${ARCHIVE_START}
paddle_gserver
paddle_function
${METRIC_LIBS}
${ARCHIVE_END}
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_parameter
paddle_math
paddle_utils
paddle_proto
paddle_cuda
paddle_api
${CMAKE_DL_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${RDMA_LD_FLAGS}
${RDMA_LIBS}
${START_END}
)
# TODO(yuyang18) : make wheel name calculated by cmake add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/dist/.timestamp COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp
COMMAND rm -rf py_paddle.egg-info build COMMAND rm -rf py_paddle.egg-info build
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS python_swig_sources DEPENDS _swig_paddle
paddle_parameter
paddle_function
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_trainer
paddle_api
paddle_cuda
${PY_PADDLE_PYTHON_FILES}
) )
install(DIRECTORY ${PROJ_ROOT}/paddle/dist/ # TODO(yuyang18) : make wheel name calculated by cmake
DESTINATION opt/paddle/share/wheels add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
)
add_custom_target(python_api_wheel ALL DEPENDS install(DIRECTORY ${PROJ_ROOT}/paddle/dist/ DESTINATION opt/paddle/share/wheels)
${PROJ_ROOT}/paddle/dist/.timestamp)
add_dependencies(python_api_wheel python_swig_sources
paddle_parameter
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_trainer
paddle_api
paddle_cuda)
if(WITH_TESTING) if(WITH_TESTING)
IF(NOT PY_PIP_FOUND) IF(NOT PY_PIP_FOUND)
......
PADDLE_BUILD_DIR="@CMAKE_CURRENT_BINARY_DIR@/../"
WITH_GPU="@WITH_GPU@"
PROTOBUF_LIBRARY="@PROTOBUF_LIBRARY@"
ZLIB_LIBRARIES="@ZLIB_LIBRARIES@"
CMAKE_THREAD_LIB="@CMAKE_THREAD_LIBS_INIT@"
CMAKE_DL_LIBS="@CMAKE_DL_LIBS@"
WITH_PYTHON="@WITH_PYTHON@"
PYTHON_LIBRARIES="@PYTHON_LIBRARIES@"
GLOG_LIBRARIES="@GLOG_LIBRARIES@"
GFLAGS_LIBRARIES="@GFLAGS_LIBRARIES@"
GFLAGS_LOCATION="@GFLAGS_LOCATION@"
CBLAS_LIBRARIES="@CBLAS_LIBRARIES@"
CUDA_LIBRARIES="@CUDA_CUDART_LIBRARY@"
WITH_COVERALLS="@ON_COVERALLS@"
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_api_config import *
import os.path
import platform
system = platform.system().lower()
is_osx = (system == 'darwin')
is_win = (system == 'windows')
is_lin = (system == 'linux')
if is_lin:
whole_start = "-Wl,--whole-archive"
whole_end = "-Wl,--no-whole-archive"
elif is_osx:
whole_start = ""
whole_end = ""
LIB_DIRS = [
"math", 'function', 'utils', 'parameter', "gserver", "api", "cuda",
"pserver", "trainer"
]
PARENT_LIB_DIRS = ['proto']
class PaddleLDFlag(object):
def __init__(self):
self.paddle_build_dir = PADDLE_BUILD_DIR
self.paddle_build_dir = os.path.abspath(self.paddle_build_dir)
self.with_gpu = PaddleLDFlag.cmake_bool(WITH_GPU)
self.protolib = PROTOBUF_LIBRARY
self.zlib = ZLIB_LIBRARIES
self.thread = CMAKE_THREAD_LIB
self.dl_libs = CMAKE_DL_LIBS
self.with_python = PaddleLDFlag.cmake_bool(WITH_PYTHON)
self.python_libs = PYTHON_LIBRARIES
self.glog_libs = GLOG_LIBRARIES
self.with_coverage = PaddleLDFlag.cmake_bool(WITH_COVERALLS)
self.gflags_libs = GFLAGS_LIBRARIES
self.gflags_location = GFLAGS_LOCATION
self.cblas_libs = CBLAS_LIBRARIES
self.curt = CUDA_LIBRARIES
def ldflag_str(self):
return " ".join(
[self.libs_dir_str(), self.parent_dir_str(), self.libs_str()])
def libs_dir_str(self):
libdirs = LIB_DIRS
return " ".join(
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
libdirs))
def parent_dir_str(self):
libdirs = PARENT_LIB_DIRS
return " ".join(
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
libdirs))
def libs_str(self):
libs = [
whole_start,
"-lpaddle_gserver",
"-lpaddle_function",
whole_end,
"-lpaddle_pserver",
"-lpaddle_trainer_lib",
"-lpaddle_network",
'-lpaddle_parameter',
"-lpaddle_math",
'-lpaddle_utils',
"-lpaddle_proto",
"-lpaddle_cuda",
"-lpaddle_api",
self.normalize_flag(self.protolib),
self.normalize_flag(self.glog_libs),
self.normalize_flag(self.gflags_libs),
self.normalize_flag(self.zlib),
self.normalize_flag(self.thread),
self.normalize_flag(self.dl_libs),
self.normalize_flag(self.cblas_libs),
]
if self.with_python:
libs.append(self.normalize_flag(self.python_libs))
if self.with_gpu:
libs.append(self.normalize_flag(self.curt))
if self.with_coverage:
libs.append("-fprofile-arcs")
return " ".join(filter(lambda l: len(l) != 0, libs))
def normalize_flag(self, cmake_flag):
"""
CMake flag string to ld flag
:type cmake_flag: str
"""
if ";" in cmake_flag:
return " ".join(map(self.normalize_flag, cmake_flag.split(";")))
if cmake_flag.startswith("/"): # is a path
return cmake_flag
elif cmake_flag.startswith("-l"): # normal link command
return cmake_flag
elif cmake_flag in [
"gflags-shared", "gflags-static", "gflags_nothreads-shared",
"gflags_nothreads-static"
]: # special for gflags
assert PaddleLDFlag.cmake_bool(self.gflags_location)
return self.gflags_location
elif len(cmake_flag) != 0:
return "".join(["-l", cmake_flag])
else:
return ""
@staticmethod
def cmake_bool(cmake_str):
"""
CMake bool string to bool
:param cmake_str: cmake boolean string
:type cmake_str: str
:rtype: bool
"""
if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith(
"-NOTFOUND"):
return False
else:
return True
def c_flag(self):
if self.with_coverage:
return [
"-fprofile-arcs", "-ftest-coverage", "-O0", "-g",
"-std=c++11"
]
else:
return ["-std=c++11"]
except ImportError:
class PaddleLDFlag(object):
def ldflag_str(self):
pass
def c_flag(self):
pass
...@@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER) ...@@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER)
filter_test(GSERVER_SOURCES) filter_test(GSERVER_SOURCES)
if(NOT WITH_GPU) if(NOT WITH_GPU)
list(REMOVE_ITEM GSERVER_HEADER list(REMOVE_ITEM GSERVER_HEADER
layers/CudnnConvBaseLayer.h
layers/CudnnConvLayer.h layers/CudnnConvLayer.h
layers/CudnnConvTransLayer.h
layers/CudnnPoolLayer.h layers/CudnnPoolLayer.h
layers/CudnnBatchNormLayer.h) layers/CudnnBatchNormLayer.h)
list(REMOVE_ITEM GSERVER_SOURCES list(REMOVE_ITEM GSERVER_SOURCES
layers/CudnnConvBaseLayer.cpp
layers/CudnnConvLayer.cpp layers/CudnnConvLayer.cpp
layers/CudnnConvTransLayer.cpp
layers/CudnnPoolLayer.cpp layers/CudnnPoolLayer.cpp
layers/CudnnBatchNormLayer.cpp) layers/CudnnBatchNormLayer.cpp)
compile_cu_as_cpp(layers/LstmCompute.cu) compile_cu_as_cpp(layers/LstmCompute.cu)
......
...@@ -164,15 +164,6 @@ public: ...@@ -164,15 +164,6 @@ public:
argu.value = value; argu.value = value;
data_.push_back(argu); data_.push_back(argu);
} }
/**
* @brief Append user defined data
* @param[in] ptr user defined data
*/
void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
Argument argu;
argu.udp = ptr;
data_.push_back(argu);
}
/* /*
* @brief Append argument * @brief Append argument
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvBaseOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu)
: Operator(config, useGpu) {
CHECK(useGpu);
CHECK_EQ(config_.input_indices_size(), 2L);
caffeMode_ = true;
getConvParams();
computeConvSizes();
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
workSpace_ = nullptr;
isSelectAlgo_ = false;
}
void ConvBaseOperator::allocConvWorkSpace() {
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
workSpaceInBytes_ = maxWorkSpace;
}
}
void ConvBaseOperator::computeConvSizes() {
hl_create_filter_descriptor(
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&imageDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvBaseOperator::reshapeImageDescriptors() {
hl_tensor_reshape(imageDesc_,
1,
channels_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
1,
numFilters_,
outputH_,
outputW_,
numFilters_ * outputH_ * outputW_,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvBaseOperator::getConvParams() {
configNumFilters_ = config_.num_filters();
const ConvConfig &conf = config_.conv_conf();
padding_ = conf.padding();
stride_ = conf.stride();
filterSize_ = conf.filter_size();
paddingY_ = conf.padding_y();
strideY_ = conf.stride_y();
filterSizeY_ = conf.filter_size_y();
filterPixels_ = filterSize_ * filterSizeY_;
configChannels_ = conf.channels();
imgSize_ = conf.img_size();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
isDeconv_ = (config_.type() == "conv") ? false : true;
if (isDeconv_) {
channels_ = configNumFilters_;
numFilters_ = configChannels_;
} else {
channels_ = configChannels_;
numFilters_ = configNumFilters_;
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Operator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvBaseOperator : public Operator {
public:
ConvBaseOperator(const OperatorConfig &config, bool useGpu);
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvBaseOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(imageDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
protected:
/**
* Get convolution parameters from layer config and
* initialize member variables.
*/
void getConvParams();
/**
* Allocate Gpu Memory for cudnn convolution algorithms.
*/
void allocConvWorkSpace();
/**
* Create cudnn tensor descriptor for convolution operation.
*/
void computeConvSizes();
/**
* Reshape cudnn tensor descriptor.
*/
void reshapeImageDescriptors();
/**
* Reshape cudnn tensor descriptor.
*/
virtual void reshape(int batchSize) = 0;
/**
* Check filter size is equal to the size calculated by parameters from
* layer config.
*/
void checkFilterSize(const MatrixPtr &filter) {
CHECK_EQ(static_cast<int>(filter->getWidth()),
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
bool isDeconv_;
int imageH_, imageW_, outputH_, outputW_;
hl_tensor_descriptor imageDesc_;
hl_tensor_descriptor outputDesc_;
hl_filter_descriptor filterDesc_;
hl_convolution_descriptor convDesc_;
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_, channels_;
/// from parsing config
int configNumFilters_, configChannels_;
int padding_, stride_, filterSize_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void *workSpace_;
bool isSelectAlgo_;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseProjection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
ThreadLocalD<std::vector<MemoryHandle *>> ConvBaseProjection::convMem_;
ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config,
ParameterPtr parameter,
bool useGpu)
: Projection(config, parameter, useGpu) {
CHECK(useGpu); // only support GPU
getConvParams();
initCudnn();
size_t height = filterH_ * filterW_ * channels_ / groups_;
size_t width = numFilters_;
weight_.reset(new Weight(height, width, parameter));
weightOffset_ = height * width / groups_;
}
void ConvBaseProjection::getConvParams() {
const ConvConfig &conf = config_.conv_conf();
paddingH_ = conf.padding_y();
paddingW_ = conf.padding();
strideH_ = conf.stride_y();
strideW_ = conf.stride();
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
configImgW_ = conf.img_size();
configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
configOutW_ = conf.output_x();
configChannels_ = conf.channels();
configNumFilters_ = config_.num_filters();
isDeconv_ = (config_.type() == "conv") ? false : true;
channels_ = (isDeconv_) ? configNumFilters_ : configChannels_;
numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_;
groups_ = conf.groups();
CHECK_EQ(channels_ % groups_, 0);
CHECK_EQ(numFilters_ % groups_, 0);
}
void ConvBaseProjection::initCudnn() {
hl_create_filter_descriptor(&filterDesc_,
channels_ / groups_,
numFilters_ / groups_,
filterH_,
filterW_);
hl_create_tensor_descriptor(&imageDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
batchNum_ = 0;
isSelectAlgo_ = false;
}
void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
// The stride between two consecutive samples in the output of ConvProjection
// may not be numFilters_ * outputH_ * outputW_ (conv) or
// channels_ * imageH_ * imageW_ (deconv)
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
size_t nStrideImage, nStrideOutput;
if (isDeconv_) {
nStrideImage = out_->value->getStride();
nStrideOutput = numFilters_ * outputH_ * outputW_;
} else {
nStrideImage = channels_ * imageH_ * imageW_;
nStrideOutput = out_->value->getStride();
}
hl_tensor_reshape(imageDesc_,
batchSize,
channels_ / groups_,
imageH_,
imageW_,
nStrideImage,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
batchSize,
numFilters_ / groups_,
outputH_,
outputW_,
nStrideOutput,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
}
void ConvBaseProjection::reshape(int batchSize) {
size_t width = calOutputSize();
CHECK_EQ(width, out_->value->getWidth());
CHECK_EQ(calInputSize(), in_->value->getWidth());
isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
if (!isSelectAlgo_) {
reshapeTensorDesc(batchSize);
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
workSpaceInBytes_ = maxWorkSpace;
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_
<< " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_;
}
isSelectAlgo_ = true;
}
void *ConvBaseProjection::getSpaceBytes(size_t size) {
std::vector<MemoryHandle *> &convMem = *convMem_;
if (convMem.empty()) {
int numDevices = hl_get_device_count();
convMem.resize(numDevices);
}
int devId = hl_get_device();
MemoryHandle **localMem = &(convMem[devId]);
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
*localMem = new GpuMemoryHandle(size);
}
return (*localMem)->getBuf();
}
ConvBaseProjection::~ConvBaseProjection() {
hl_destroy_tensor_descriptor(imageDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
/**
* @brief Base class for ConvProjection and ConvTransProjection.
*/
class ConvBaseProjection : public Projection {
public:
/**
* Constructor.
*/
ConvBaseProjection(const ProjectionConfig& config,
ParameterPtr parameter,
bool useGpu);
~ConvBaseProjection();
protected:
void getConvParams();
void initCudnn();
void reshapeTensorDesc(int batchSize);
void reshape(int batchSize);
virtual size_t calOutputSize() = 0;
virtual size_t calInputSize() = 0;
static void* getSpaceBytes(size_t size);
/// True if it's deconv projection layer, false if it's ConvProjection layer
bool isDeconv_;
/// imageH_ and imageW_ / outputH_ and outputW_
/// is calculated from the input layer.
int imageH_, imageW_;
int outputH_, outputW_;
/// configImgH_ and configImgW_ / configOutH_ and configOutW_
/// is obtained from config.
int configImgH_, configImgW_;
int configOutH_, configOutW_;
/// channels_ and numFilters_ are defined in terms of convolution semantics
int channels_, numFilters_;
/// configChannels and configNumFilters_ are obtained from config
/// For Conv they are the same as channels_ and numFilters
/// For ConvTrans they are opposite to channels_ and numFilters
int configChannels_, configNumFilters_;
int paddingH_, paddingW_;
int strideH_, strideW_;
int filterH_, filterW_;
/// One group offset of input data.
int inputOffset_;
/// One group offset of output data.
int outputOffset_;
/// One group offset of weight.
int weightOffset_;
int groups_;
/// Cudnn tensor descriptor for input.
hl_tensor_descriptor imageDesc_;
/// Cudnn tensor descriptor for output.
hl_tensor_descriptor outputDesc_;
/// Cudnn tensor descriptor for filter.
hl_filter_descriptor filterDesc_;
/// Cudnn tensor descriptor for a convolution operation.
hl_convolution_descriptor convDesc_;
/// Record the algorithm for forward convolution, which is obtained by cudnn
/// api to search the best suited algorithm.
int fwdAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// filter coefficients.
int bwdFilterAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// the output.
int bwdDataAlgo_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// forward convolution with the specified algo.
size_t fwdLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardFilter with the specified algo.
size_t bwdDataLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardData with the specified algo.
size_t bwdFilterLimitBytes_;
/// Size of total work space.
size_t workSpaceInBytes_;
/// Whether to call cuDNN api to choose conv algorithm.
bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;
bool bias_;
std::unique_ptr<Weight> weight_;
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
};
} // namespace paddle
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "Operator.h" #include "ConvOperator.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
...@@ -27,120 +27,8 @@ namespace paddle { ...@@ -27,120 +27,8 @@ namespace paddle {
* The config file api is conv_operator. * The config file api is conv_operator.
*/ */
class ConvOperator : public Operator {
public:
ConvOperator(const OperatorConfig &config, bool useGpu);
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(inputDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
virtual void forward();
virtual void backward();
private:
/**
* Get convolution parameters from layer config and
* initialize member variables.
*/
void getConvParams();
/**
* Allocate Gpu Memory for cudnn convolution algorithms.
*/
void allocConvWorkSpace(size_t maxWorkSpace);
/**
* Create cudnn tensor descriptor for convolution operation.
*/
void computeConvSizes();
/**
* Reshape cudnn tensor descriptor.
*/
void reshapeImageDescriptors();
/**
* Reshape cudnn tensor descriptor.
*/
void reshape(int batchSize);
/**
* Check filter size is equal to the size calculated by parameters from
* layer config.
*/
void checkFilterSize(const MatrixPtr &filter) {
CHECK_EQ(static_cast<int>(filter->getWidth()),
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
int imageH_, imageW_, outputH_, outputW_;
hl_tensor_descriptor inputDesc_;
hl_tensor_descriptor outputDesc_;
hl_filter_descriptor filterDesc_;
hl_convolution_descriptor convDesc_;
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_;
int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void *workSpace_;
bool isSelectAlgo_;
};
REGISTER_OPERATOR(conv, ConvOperator); REGISTER_OPERATOR(conv, ConvOperator);
ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
: Operator(config, useGpu) {
CHECK(useGpu);
CHECK_EQ(config_.input_indices_size(), 2L);
caffeMode_ = true;
getConvParams();
computeConvSizes();
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
workSpace_ = nullptr;
isSelectAlgo_ = false;
}
void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
workSpaceInBytes_ = maxWorkSpace;
}
}
void ConvOperator::reshape(int batchSize) { void ConvOperator::reshape(int batchSize) {
imageH_ = ins_[0]->getFrameHeight(); imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth(); imageW_ = ins_[0]->getFrameWidth();
...@@ -148,106 +36,25 @@ void ConvOperator::reshape(int batchSize) { ...@@ -148,106 +36,25 @@ void ConvOperator::reshape(int batchSize) {
if (imageW_ == 0) imageW_ = imgSize_; if (imageW_ == 0) imageW_ = imgSize_;
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_); outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_); outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
/// Check that the outputSizes are consistent with config
CHECK_EQ(outputH_, outputY_);
CHECK_EQ(outputW_, outputX_);
out_->setFrameHeight(outputH_); out_->setFrameHeight(outputH_);
out_->setFrameWidth(outputW_); out_->setFrameWidth(outputW_);
reshapeImageDescriptors(); reshapeImageDescriptors();
if (!isSelectAlgo_) { inputOffset_ = channels_ * imageH_ * imageW_;
hl_conv_workspace(inputDesc_, outputOffset_ = numFilters_ * outputH_ * outputW_;
outputDesc_, weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_;
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
allocConvWorkSpace(maxWorkSpace); if (!isSelectAlgo_) {
allocConvWorkSpace();
} }
isSelectAlgo_ = true; isSelectAlgo_ = true;
} }
void ConvOperator::computeConvSizes() {
hl_create_filter_descriptor(
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&inputDesc_);
int outputX =
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
int outputY =
outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
CHECK_EQ(outputX, outputX_);
CHECK_EQ(outputY, outputY_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
inputDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvOperator::reshapeImageDescriptors() {
hl_tensor_reshape(inputDesc_,
1,
channels_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
1,
numFilters_,
outputH_,
outputW_,
numFilters_ * outputH_ * outputW_,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
inputDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
inputOffset_ = channels_ * imageH_ * imageW_;
outputOffset_ = numFilters_ * outputH_ * outputW_;
weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
}
void ConvOperator::getConvParams() {
numFilters_ = config_.num_filters();
const ConvConfig &conf = config_.conv_conf();
padding_ = conf.padding();
stride_ = conf.stride();
filterSize_ = conf.filter_size();
paddingY_ = conf.padding_y();
strideY_ = conf.stride_y();
filterSizeY_ = conf.filter_size_y();
filterPixels_ = filterSize_ * filterSizeY_;
channels_ = conf.channels();
imgSize_ = conf.img_size();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
}
void ConvOperator::forward() { void ConvOperator::forward() {
size_t batchSize = ins_[0]->value->getHeight(); size_t batchSize = ins_[0]->value->getHeight();
reshape(batchSize); reshape(batchSize);
...@@ -264,7 +71,7 @@ void ConvOperator::forward() { ...@@ -264,7 +71,7 @@ void ConvOperator::forward() {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
real *outData = out_->value->getData() + outputOffset_ * batchId; real *outData = out_->value->getData() + outputOffset_ * batchId;
hl_convolution_forward(inputDesc_, hl_convolution_forward(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outData, outData,
...@@ -287,7 +94,7 @@ void ConvOperator::backward() { ...@@ -287,7 +94,7 @@ void ConvOperator::backward() {
if (ins_[1]->grad) { if (ins_[1]->grad) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
hl_convolution_backward_filter(inputDesc_, hl_convolution_backward_filter(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outGrad, outGrad,
...@@ -303,7 +110,7 @@ void ConvOperator::backward() { ...@@ -303,7 +110,7 @@ void ConvOperator::backward() {
if (NULL != preGrad) { if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + inputOffset_ * batchId; real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
hl_convolution_backward_data(inputDesc_, hl_convolution_backward_data(imageDesc_,
inputGrad, inputGrad,
outputDesc_, outputDesc_,
outGrad, outGrad,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvOperator : public ConvBaseOperator {
public:
ConvOperator(const OperatorConfig &config, bool useGpu)
: ConvBaseOperator(config, useGpu) {}
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvOperator() {}
void forward() override;
void backward() override;
void reshape(int batchSize) override;
};
} // namespace paddle
...@@ -19,149 +19,32 @@ namespace paddle { ...@@ -19,149 +19,32 @@ namespace paddle {
REGISTER_PROJECTION(conv, ConvProjection); REGISTER_PROJECTION(conv, ConvProjection);
ThreadLocalD<std::vector<MemoryHandle *>> ConvProjection::convMem_; size_t ConvProjection::calOutputSize() {
imageH_ = in_->getFrameHeight();
ConvProjection::ConvProjection(const ProjectionConfig &config, imageW_ = in_->getFrameWidth();
ParameterPtr parameter, if (imageH_ == 0) imageH_ = configImgH_;
bool useGpu) if (imageW_ == 0) imageW_ = configImgW_;
: Projection(config, parameter, useGpu) { outputH_ = outputSize(imageH_,
CHECK(useGpu); // only support GPU filterH_,
getConvParams(); paddingH_,
initCudnn(); strideH_,
/* caffeMode */ true);
size_t height = filterH_ * filterW_ * channels_ / groups_; outputW_ = outputSize(imageW_,
size_t width = numFilters_; filterW_,
weight_.reset(new Weight(height, width, parameter)); paddingW_,
weightOffset_ = height * width / groups_; strideW_,
} /* caffeMode */ true);
void ConvProjection::getConvParams() { const_cast<Argument *>(out_)->setFrameHeight(outputH_);
const ConvConfig &conf = config_.conv_conf(); const_cast<Argument *>(out_)->setFrameWidth(outputW_);
paddingH_ = conf.padding_y();
paddingW_ = conf.padding(); inputOffset_ = (configChannels_ / groups_) * imageH_ * imageW_;
outputOffset_ = (configNumFilters_ / groups_) * outputH_ * outputW_;
strideH_ = conf.stride_y(); return outputH_ * outputW_ * configNumFilters_;
strideW_ = conf.stride();
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
configImgW_ = conf.img_size();
channels_ = conf.channels();
numFilters_ = config_.num_filters();
groups_ = conf.groups();
CHECK_EQ(channels_ % groups_, 0);
CHECK_EQ(numFilters_ % groups_, 0);
}
void ConvProjection::initCudnn() {
hl_create_filter_descriptor(&filterDesc_,
channels_ / groups_,
numFilters_ / groups_,
filterH_,
filterW_);
hl_create_tensor_descriptor(&inputDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
inputDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
batchNum_ = 0;
isSelectAlgo_ = false;
}
void ConvProjection::reshapeTensorDesc(int batchSize) {
hl_tensor_reshape(inputDesc_,
batchSize,
channels_ / groups_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_reset_convolution_descriptor(convDesc_,
inputDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
// The stride between two consecutive images in ConvProjection may not be 1,
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
// In fact, only "nStride = out_->value->getStride()" is ok.
size_t nStride = numFilters_ * outputH_ * outputW_;
if (out_->value->isContiguous()) {
CHECK_EQ(nStride, out_->value->getWidth());
} else {
nStride = out_->value->getStride();
}
hl_tensor_reshape(outputDesc_,
batchSize,
numFilters_ / groups_,
outputH_,
outputW_,
nStride,
outputH_ * outputW_,
outputW_,
1);
} }
void ConvProjection::reshape(int batchSize) { size_t ConvProjection::calInputSize() {
size_t width = calOutputSize(); return static_cast<size_t>(configChannels_ * imageH_ * imageW_);
CHECK_EQ(width, out_->value->getWidth());
CHECK_EQ(static_cast<size_t>(channels_ * imageH_ * imageW_),
in_->value->getWidth())
<< "Wrong input size for convolution"
<< " channels=" << channels_ << " imageH=" << imageH_
<< " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth();
isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
if (!isSelectAlgo_) {
reshapeTensorDesc(batchSize);
hl_conv_workspace(inputDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
workSpaceInBytes_ = maxWorkSpace;
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_
<< " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_;
}
isSelectAlgo_ = true;
} }
void ConvProjection::forward() { void ConvProjection::forward() {
...@@ -179,7 +62,7 @@ void ConvProjection::forward() { ...@@ -179,7 +62,7 @@ void ConvProjection::forward() {
real *inputData = in_->value->getData() + g * inputOffset_; real *inputData = in_->value->getData() + g * inputOffset_;
real *wgtData = weight_->getW()->getData() + g * weightOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_;
real *outData = out_->value->getData() + g * outputOffset_; real *outData = out_->value->getData() + g * outputOffset_;
hl_convolution_forward(inputDesc_, hl_convolution_forward(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outData, outData,
...@@ -205,7 +88,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { ...@@ -205,7 +88,7 @@ void ConvProjection::backward(const UpdateCallback &callback) {
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
real *inputData = in_->value->getData() + g * inputOffset_; real *inputData = in_->value->getData() + g * inputOffset_;
real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_;
hl_convolution_backward_filter(inputDesc_, hl_convolution_backward_filter(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outGrad, outGrad,
...@@ -221,7 +104,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { ...@@ -221,7 +104,7 @@ void ConvProjection::backward(const UpdateCallback &callback) {
if (NULL != preGrad) { if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + g * inputOffset_; real *inputGrad = preGrad->getData() + g * inputOffset_;
real *wgtData = weight_->getW()->getData() + g * weightOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_;
hl_convolution_backward_data(inputDesc_, hl_convolution_backward_data(imageDesc_,
inputGrad, inputGrad,
outputDesc_, outputDesc_,
outGrad, outGrad,
...@@ -237,26 +120,4 @@ void ConvProjection::backward(const UpdateCallback &callback) { ...@@ -237,26 +120,4 @@ void ConvProjection::backward(const UpdateCallback &callback) {
weight_->getParameterPtr()->incUpdate(callback); weight_->getParameterPtr()->incUpdate(callback);
} }
void *ConvProjection::getSpaceBytes(size_t size) {
std::vector<MemoryHandle *> &convMem = *convMem_;
if (convMem.empty()) {
int numDevices = hl_get_device_count();
convMem.resize(numDevices);
}
int devId = hl_get_device();
MemoryHandle **localMem = &(convMem[devId]);
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
*localMem = new GpuMemoryHandle(size);
}
return (*localMem)->getBuf();
}
ConvProjection::~ConvProjection() {
hl_destroy_tensor_descriptor(inputDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
} // namespace paddle } // namespace paddle
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "Projection.h" #include "ConvBaseProjection.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
namespace paddle { namespace paddle {
...@@ -22,109 +22,22 @@ namespace paddle { ...@@ -22,109 +22,22 @@ namespace paddle {
/** /**
* @brief Convolution projection do the same calculation with CudnnConvLayer. * @brief Convolution projection do the same calculation with CudnnConvLayer.
*/ */
class ConvProjection : public Projection { class ConvProjection : public ConvBaseProjection {
public: public:
/** /**
* Constructor. * Constructor.
*/ */
ConvProjection(const ProjectionConfig& config, ConvProjection(const ProjectionConfig& config,
ParameterPtr parameter, ParameterPtr parameter,
bool useGpu); bool useGpu)
: ConvBaseProjection(config, parameter, useGpu) {}
~ConvProjection(); ~ConvProjection() {}
virtual void forward(); virtual void forward();
virtual void backward(const UpdateCallback& callback); virtual void backward(const UpdateCallback& callback);
virtual size_t calOutputSize();
protected: virtual size_t calInputSize();
void getConvParams();
void initCudnn();
void reshapeTensorDesc(int batchSize);
void reshape(int batchSize);
size_t calOutputSize() {
imageH_ = in_->getFrameHeight();
imageW_ = in_->getFrameWidth();
if (imageH_ == 0) imageH_ = configImgH_;
if (imageW_ == 0) imageW_ = configImgW_;
outputH_ = outputSize(imageH_,
filterH_,
paddingH_,
strideH_,
/* caffeMode */ true);
outputW_ = outputSize(imageW_,
filterW_,
paddingW_,
strideW_,
/* caffeMode */ true);
const_cast<Argument*>(out_)->setFrameHeight(outputH_);
const_cast<Argument*>(out_)->setFrameWidth(outputW_);
inputOffset_ = (channels_ / groups_) * imageH_ * imageW_;
outputOffset_ = (numFilters_ / groups_) * outputH_ * outputW_;
return outputH_ * outputW_ * numFilters_;
}
static void* getSpaceBytes(size_t size);
/// imageH_ and imageW_ is calculated from the input layer.
int imageH_, imageW_;
/// configImgH_ and configImgW_ is obtained from config.
int configImgH_, configImgW_;
int outputH_, outputW_;
int channels_, numFilters_;
int paddingH_, paddingW_;
int strideH_, strideW_;
int filterH_, filterW_;
/// One group offset of input data.
int inputOffset_;
/// One group offset of output data.
int outputOffset_;
/// One group offset of weight.
int weightOffset_;
int groups_;
/// Cudnn tensor descriptor for input.
hl_tensor_descriptor inputDesc_;
/// Cudnn tensor descriptor for output.
hl_tensor_descriptor outputDesc_;
/// Cudnn tensor descriptor for filter.
hl_filter_descriptor filterDesc_;
/// Cudnn tensor descriptor for a convolution operation.
hl_convolution_descriptor convDesc_;
/// Record the algorithm for forward convolution, which is obtained by cudnn
/// api to search the best suited algorithm.
int fwdAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// filter coefficients.
int bwdFilterAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// the output.
int bwdDataAlgo_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// forward convolution with the specified algo.
size_t fwdLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardFilter with the specified algo.
size_t bwdDataLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardData with the specified algo.
size_t bwdFilterLimitBytes_;
/// Size of total work space.
size_t workSpaceInBytes_;
/// Whether to call cuDNN api to choose conv algorithm.
bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;
bool bias_;
std::unique_ptr<Weight> weight_;
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
}; };
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvTransOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvTransOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
REGISTER_OPERATOR(convt, ConvTransOperator);
void ConvTransOperator::reshape(int batchSize) {
outputH_ = ins_[0]->getFrameHeight();
outputW_ = ins_[0]->getFrameWidth();
if (outputH_ == 0) outputH_ = outputY_;
if (outputW_ == 0) outputW_ = outputX_;
imageH_ = imageSize(outputH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
imageW_ = imageSize(outputW_, filterSize_, padding_, stride_, caffeMode_);
/// Check that the imageSizes are consistent with config
CHECK_EQ(imageH_, imgSizeY_);
CHECK_EQ(imageW_, imgSize_);
out_->setFrameHeight(imageH_);
out_->setFrameWidth(imageW_);
reshapeImageDescriptors();
inputOffset_ = numFilters_ * outputH_ * outputW_;
outputOffset_ = channels_ * imageH_ * imageW_;
weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_;
if (!isSelectAlgo_) {
allocConvWorkSpace();
}
isSelectAlgo_ = true;
}
void ConvTransOperator::forward() {
size_t batchSize = ins_[0]->value->getHeight();
reshape(batchSize);
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(
out_->value, batchSize, imageH_ * imageW_ * channels_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
real *outData = out_->value->getData() + outputOffset_ * batchId;
hl_convolution_backward_data(imageDesc_,
outData,
outputDesc_,
inputData,
filterDesc_,
wgtData,
convDesc_,
workSpace_,
workSpaceInBytes_,
bwdDataAlgo_);
}
}
}
void ConvTransOperator::backward() {
size_t batchSize = ins_[0]->value->getHeight();
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
real *outGrad = out_->grad->getData() + outputOffset_ * batchId;
if (ins_[1]->grad) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
hl_convolution_backward_filter(imageDesc_,
outGrad,
outputDesc_,
inputData,
filterDesc_,
weightGrad,
convDesc_,
workSpace_,
workSpaceInBytes_,
bwdFilterAlgo_);
}
MatrixPtr preGrad = ins_[0]->grad;
if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
hl_convolution_forward(imageDesc_,
outGrad,
outputDesc_,
inputGrad,
filterDesc_,
wgtData,
convDesc_,
workSpace_,
workSpaceInBytes_,
fwdAlgo_);
}
}
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvTransOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvTransOperator : public ConvBaseOperator {
public:
ConvTransOperator(const OperatorConfig &config, bool useGpu)
: ConvBaseOperator(config, useGpu) {}
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvTransOperator() {}
void forward() override;
void backward() override;
void reshape(int batchSize) override;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvTransProjection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_PROJECTION(convt, ConvTransProjection);
size_t ConvTransProjection::calOutputSize() {
outputH_ = in_->getFrameHeight();
outputW_ = in_->getFrameWidth();
if (outputH_ == 0) outputH_ = configOutH_;
if (outputW_ == 0) outputW_ = configOutW_;
imageH_ = imageSize(outputH_,
filterH_,
paddingH_,
strideH_,
/* caffeMode */ true);
imageW_ = imageSize(outputW_,
filterW_,
paddingW_,
strideW_,
/* caffeMode */ true);
const_cast<Argument *>(out_)->setFrameHeight(imageH_);
const_cast<Argument *>(out_)->setFrameWidth(imageW_);
inputOffset_ = (configChannels_ / groups_) * outputH_ * outputW_;
outputOffset_ = (configNumFilters_ / groups_) * imageH_ * imageW_;
return imageH_ * imageW_ * configNumFilters_;
}
size_t ConvTransProjection::calInputSize() {
return static_cast<size_t>(configChannels_ * outputH_ * outputW_);
}
void ConvTransProjection::forward() {
int batchSize = in_->value->getHeight();
reshape(batchSize);
void *workSpace = NULL;
if (workSpaceInBytes_ > 0) {
workSpace = getSpaceBytes(workSpaceInBytes_);
}
for (int g = 0; g < groups_; ++g) {
REGISTER_TIMER_INFO("CudnnConvTransFwTimer", getName().c_str());
real *inData = in_->value->getData() + g * inputOffset_;
real *wgtData = weight_->getW()->getData() + g * weightOffset_;
real *outData = out_->value->getData() + g * outputOffset_;
hl_convolution_backward_data(imageDesc_,
outData,
outputDesc_,
inData,
filterDesc_,
wgtData,
convDesc_,
workSpace,
bwdDataLimitBytes_,
bwdDataAlgo_);
}
}
void ConvTransProjection::backward(const UpdateCallback &callback) {
REGISTER_TIMER_INFO("CudnnConvTransBpTimer", getName().c_str());
void *workSpace = NULL;
if (workSpaceInBytes_ > 0) {
workSpace = getSpaceBytes(workSpaceInBytes_);
}
for (int g = 0; g < groups_; ++g) {
real *outGrad = out_->grad->getData() + g * outputOffset_;
if (weight_->getWGrad()) {
real *inData = in_->value->getData() + g * inputOffset_;
real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_;
hl_convolution_backward_filter(imageDesc_,
outGrad,
outputDesc_,
inData,
filterDesc_,
weightGrad,
convDesc_,
workSpace,
bwdFilterLimitBytes_,
bwdFilterAlgo_);
}
MatrixPtr preGrad = in_->grad;
if (NULL != preGrad) {
real *inGrad = preGrad->getData() + g * inputOffset_;
real *wgtData = weight_->getW()->getData() + g * weightOffset_;
hl_convolution_forward(imageDesc_,
outGrad,
outputDesc_,
inGrad,
filterDesc_,
wgtData,
convDesc_,
workSpace,
fwdLimitBytes_,
fwdAlgo_);
}
}
weight_->getParameterPtr()->incUpdate(callback);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvBaseProjection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
/**
* @brief Convolution projection do the same calculation with CudnnConvLayer.
*/
class ConvTransProjection : public ConvBaseProjection {
public:
/**
* Constructor.
*/
ConvTransProjection(const ProjectionConfig& config,
ParameterPtr parameter,
bool useGpu)
: ConvBaseProjection(config, parameter, useGpu) {}
~ConvTransProjection() {}
virtual void forward();
virtual void backward(const UpdateCallback& callback);
virtual size_t calOutputSize();
virtual size_t calInputSize();
};
} // namespace paddle
...@@ -192,6 +192,59 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output, ...@@ -192,6 +192,59 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output,
outputG.sumOfSquaresBp(output, *label.value); outputG.sumOfSquaresBp(output, *label.value);
} }
//
// class SmoothL1CostLayer
//
REGISTER_LAYER(smooth_l1, SmoothL1CostLayer);
bool SmoothL1CostLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
return CostLayer::init(layerMap, parameterMap);
}
void SmoothL1CostLayer::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
MatrixPtr targetCpu, outputCpu, labelCpu;
if (useGpu_) {
targetCpu =
Matrix::create(target.getHeight(), target.getWidth(), false, false);
outputCpu =
Matrix::create(output.getHeight(), output.getWidth(), false, false);
labelCpu = Matrix::create(
label.value->getHeight(), label.value->getWidth(), false, false);
targetCpu->copyFrom(target);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
targetCpu->smoothL1(*outputCpu, *(labelCpu));
target.copyFrom(*targetCpu);
} else {
target.smoothL1(output, *label.value);
}
}
void SmoothL1CostLayer::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
MatrixPtr outputGCpu, outputCpu, labelCpu;
if (useGpu_) {
outputGCpu =
Matrix::create(outputG.getHeight(), outputG.getWidth(), false, false);
outputCpu =
Matrix::create(output.getHeight(), output.getWidth(), false, false);
labelCpu = Matrix::create(
label.value->getHeight(), label.value->getWidth(), false, false);
outputGCpu->copyFrom(outputG);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
outputGCpu->smoothL1Bp(*outputCpu, *labelCpu);
outputG.copyFrom(*outputGCpu);
} else {
outputG.smoothL1Bp(output, *label.value);
}
}
// //
// class RankingCost // class RankingCost
// //
......
...@@ -159,6 +159,29 @@ public: ...@@ -159,6 +159,29 @@ public:
Matrix& outputGrad) override; Matrix& outputGrad) override;
}; };
/**
* This cost layer compute smooth L1 loss for real-valued regression
* tasks.
* \f[
* L =
* (output - label)^2 * 0.5 / -1 < (output - label) < 1 /
* (output - label) - 0.5 / otherwise /
* \f]
*/
class SmoothL1CostLayer : public CostLayer {
public:
explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;
};
/** /**
* A cost layer for learning to rank (LTR) task. This layer contains at leat * A cost layer for learning to rank (LTR) task. This layer contains at leat
* three inputs. * three inputs.
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "NormLayer.h"
#include "paddle/math/BaseMatrix.h"
#include "paddle/math/Matrix.h"
namespace paddle {
MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data,
size_t iter,
size_t spatialDim) {
return Matrix::create(data->getData() + iter * channels_ * spatialDim,
channels_,
spatialDim,
false,
useGpu_);
}
MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
size_t iter,
size_t spatialDim) {
return Matrix::create(
data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
}
void CrossChannelNormLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr inV = getInputValue(0);
size_t batchSize = inV->getHeight();
size_t dataDim = inV->getWidth();
CHECK_EQ(getSize(), dataDim);
reserveOutput(batchSize, dataDim);
MatrixPtr outV = getOutputValue();
size_t spatialDim = dataDim / channels_;
Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
normBuffer_->zeroMem();
// add eps to avoid overflow
normBuffer_->addScalar(*normBuffer_, 1e-6);
inV->square2(*dataBuffer_);
for (size_t i = 0; i < batchSize; i++) {
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim);
MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
// compute norm.
spatialBuffer_->sumCols(*dataTmp, 1, 0);
spatialBuffer_->sqrt2(*spatialBuffer_);
normTmp->copyFrom(*spatialBuffer_);
outVTmp->copyFrom(*inVTmp);
outVTmp->divRowVector(*spatialBuffer_);
// scale the layer.
outVTmp->mulColVector(*scale_->getW());
}
}
void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
MatrixPtr inG = getInputGrad(0);
MatrixPtr inV = getInputValue(0);
MatrixPtr outG = getOutputGrad();
MatrixPtr outV = getOutputValue();
size_t batchSize = inG->getHeight();
size_t dataDim = inG->getWidth();
size_t spatialDim = dataDim / channels_;
dataBuffer_->dotMul(*outG, *outV);
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_);
scaleDiff_->zeroMem();
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim);
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim);
const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
channelBuffer_->sumRows(*dataTmp, 1, 0);
channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW()));
// store a / scale[i] in scaleDiff_ temporary
scaleDiff_->add(*channelBuffer_, 1.);
sampleBuffer_->dotMul(*inVTmp, *outGTmp);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
// scale the grad
inGTmp->copyFrom(*inVTmp);
inGTmp->mulRowVector(*spatialBuffer_);
// divide by square of norm
spatialBuffer_->dotMul(*normTmp, *normTmp);
inGTmp->divRowVector(*spatialBuffer_);
// subtract
inGTmp->add(*outGTmp, -1, 1);
// divide by norm
inGTmp->divRowVector(*normTmp);
// scale the diff
inGTmp->mulColVector(*scale_->getW());
}
// updata scale
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
scale_->getParameterPtr()->incUpdate(callback);
}
} // namespace paddle
...@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "CudnnConvLayer.h" #include "CudnnConvBaseLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
namespace paddle { namespace paddle {
REGISTER_LAYER(cudnn_conv, CudnnConvBaseLayer);
REGISTER_LAYER(cudnn_convt, CudnnConvBaseLayer);
REGISTER_LAYER(cudnn_conv, CudnnConvLayer); bool CudnnConvBaseLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
bool CudnnConvLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
CHECK(useGpu_) << "CudnnConvLayer only support gpu"; CHECK(useGpu_) << "CudnnConvLayer only support gpu";
...@@ -33,7 +33,11 @@ bool CudnnConvLayer::init(const LayerMap &layerMap, ...@@ -33,7 +33,11 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
CHECK(config_.shared_biases()); CHECK(config_.shared_biases());
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
ProjectionConfig *conf = new ProjectionConfig(); ProjectionConfig *conf = new ProjectionConfig();
conf->set_type("conv"); if (isDeconv_) {
conf->set_type("convt");
} else {
conf->set_type("conv");
}
conf->set_num_filters(numFilters_); conf->set_num_filters(numFilters_);
ConvConfig *convConf = conf->mutable_conv_conf(); ConvConfig *convConf = conf->mutable_conv_conf();
*convConf = *(config_.mutable_inputs(i)->mutable_conv_conf()); *convConf = *(config_.mutable_inputs(i)->mutable_conv_conf());
...@@ -47,14 +51,13 @@ bool CudnnConvLayer::init(const LayerMap &layerMap, ...@@ -47,14 +51,13 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
if (biases_.get() && sharedBiases_) { if (biases_.get() && sharedBiases_) {
hl_create_tensor_descriptor(&biasDesc_); hl_create_tensor_descriptor(&biasDesc_);
hl_create_tensor_descriptor(&outputDesc_); hl_create_tensor_descriptor(&outputDesc_);
hl_tensor_reshape(biasDesc_, 1, numFilters_ / groups_[0], 1, 1); hl_tensor_reshape(biasDesc_, 1, numFilters_, 1, 1);
biasOffset_ = numFilters_ / groups_[0];
} }
return true; return true;
} }
void CudnnConvLayer::forward(PassType passType) { void CudnnConvBaseLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
int batchSize = getInput(0).getBatchSize(); int batchSize = getInput(0).getBatchSize();
...@@ -67,37 +70,41 @@ void CudnnConvLayer::forward(PassType passType) { ...@@ -67,37 +70,41 @@ void CudnnConvLayer::forward(PassType passType) {
if (biases_) { if (biases_) {
REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str());
int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
int outH, outW;
if (isDeconv_) {
outH = imgSizeH_[0];
outW = imgSizeW_[0];
} else {
outH = outputH_[0];
outW = outputW_[0];
}
hl_tensor_reshape(outputDesc_, hl_tensor_reshape(outputDesc_,
batchSize, batchSize,
numFilters_ / groups_[0], numFilters_,
outputH_[0], outH,
outputW_[0], outW,
numFilters_ * outputH_[0] * outputW_[0], numFilters_ * outH * outW,
outputH_[0] * outputW_[0], outH * outW,
outputW_[0], outW,
1); 1);
outputOffset_ = getOutputValue()->getWidth() / groups_[0]; real *outData = getOutputValue()->getData();
for (int g = 0; g < groups_[0]; ++g) { real *biasData = biases_->getW()->getData();
real *biasData = biases_->getW()->getData() + biasOffset_ * g; hl_convolution_forward_add_bias(biasDesc_, biasData, outputDesc_, outData);
real *outData = getOutputValue()->getData() + outputOffset_ * g;
hl_convolution_forward_add_bias(
biasDesc_, biasData, outputDesc_, outData);
}
} }
forwardActivation(); forwardActivation();
} }
void CudnnConvLayer::backward(const UpdateCallback &callback) { void CudnnConvBaseLayer::backward(const UpdateCallback &callback) {
backwardActivation(); backwardActivation();
if (biases_ && biases_->getWGrad()) { if (biases_ && biases_->getWGrad()) {
REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str());
for (int g = 0; g < groups_[0]; ++g) { real *biasGrad = biases_->getWGrad()->getData();
real *biasGrad = biases_->getWGrad()->getData() + biasOffset_ * g; real *outGrad = getOutputGrad()->getData();
real *outGrad = getOutputGrad()->getData() + outputOffset_ * g; hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad);
hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad);
}
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
...@@ -106,7 +113,7 @@ void CudnnConvLayer::backward(const UpdateCallback &callback) { ...@@ -106,7 +113,7 @@ void CudnnConvLayer::backward(const UpdateCallback &callback) {
} }
} }
CudnnConvLayer::~CudnnConvLayer() { CudnnConvBaseLayer::~CudnnConvBaseLayer() {
if (biases_) { if (biases_) {
hl_destroy_tensor_descriptor(biasDesc_); hl_destroy_tensor_descriptor(biasDesc_);
hl_destroy_tensor_descriptor(outputDesc_); hl_destroy_tensor_descriptor(outputDesc_);
......
...@@ -30,27 +30,24 @@ namespace paddle { ...@@ -30,27 +30,24 @@ namespace paddle {
* *
* The config file api is img_conv_layer. * The config file api is img_conv_layer.
*/ */
class CudnnConvLayer : public ConvBaseLayer { class CudnnConvBaseLayer : public ConvBaseLayer {
protected: protected:
std::vector<std::unique_ptr<ProjectionConfig>> projConf_; std::vector<std::unique_ptr<ProjectionConfig>> projConf_;
std::vector<std::unique_ptr<Projection>> projections_; std::vector<std::unique_ptr<Projection>> projections_;
hl_tensor_descriptor biasDesc_; hl_tensor_descriptor biasDesc_;
hl_tensor_descriptor outputDesc_; hl_tensor_descriptor outputDesc_;
int biasOffset_;
int outputOffset_;
public: public:
explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} explicit CudnnConvBaseLayer(const LayerConfig& config)
: ConvBaseLayer(config) {}
~CudnnConvLayer(); ~CudnnConvBaseLayer();
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
void addBiases();
void bpropBiases();
}; };
} // namespace paddle } // namespace paddle
...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) { ...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) {
return new ResponseNormLayer(config); return new ResponseNormLayer(config);
} else if (norm == "cmrnorm-projection") { } else if (norm == "cmrnorm-projection") {
return new CMRProjectionNormLayer(config); return new CMRProjectionNormLayer(config);
} else if (norm == "cross-channel-norm") {
return new CrossChannelNormLayer(config);
} else { } else {
LOG(FATAL) << "Unknown norm type: " << norm; LOG(FATAL) << "Unknown norm type: " << norm;
return nullptr; return nullptr;
...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, ...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
return true; return true;
} }
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
} // namespace paddle } // namespace paddle
...@@ -65,4 +65,35 @@ public: ...@@ -65,4 +65,35 @@ public:
} }
}; };
/**
* This layer applys normalization across the channels of each sample to a
* conv layer's output, and scales the output by a group of trainable factors
* whose dimensions equal to the number of channels.
* - Input: One and only one input layer are accepted.
* - Output: The normalized data of the input data.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class CrossChannelNormLayer : public NormLayer {
public:
explicit CrossChannelNormLayer(const LayerConfig& config)
: NormLayer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback);
MatrixPtr createSampleMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
MatrixPtr createSpatialMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
protected:
size_t channels_;
std::unique_ptr<Weight> scale_;
MatrixPtr scaleDiff_;
MatrixPtr normBuffer_;
MatrixPtr dataBuffer_;
MatrixPtr channelBuffer_;
MatrixPtr spatialBuffer_;
MatrixPtr sampleBuffer_;
};
} // namespace paddle } // namespace paddle
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
/** /**
* @brief A layer for generating priorbox locations and variances. * @brief A layer for generating priorbox locations and variances.
* - Input: Two and only two input layer are accepted. The input layer must be * - Input: Two and only two input layer are accepted. The input layer must be
* be a data output layer and a convolution output layer. * be a data output layer and a convolution output layer.
* - Output: The priorbox locations and variances of the input data. * - Output: The priorbox locations and variances of the input data.
* Reference: * Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
...@@ -45,27 +45,32 @@ protected: ...@@ -45,27 +45,32 @@ protected:
MatrixPtr buffer_; MatrixPtr buffer_;
}; };
REGISTER_LAYER(priorbox, PriorBoxLayer);
bool PriorBoxLayer::init(const LayerMap& layerMap, bool PriorBoxLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap); Layer::init(layerMap, parameterMap);
auto pbConf = config_.inputs(0).priorbox_conf(); auto pbConf = config_.inputs(0).priorbox_conf();
std::vector<real> tmp;
aspectRatio_.push_back(1.);
std::copy(pbConf.min_size().begin(), std::copy(pbConf.min_size().begin(),
pbConf.min_size().end(), pbConf.min_size().end(),
std::back_inserter(minSize_)); std::back_inserter(minSize_));
std::copy(pbConf.max_size().begin(), std::copy(pbConf.max_size().begin(),
pbConf.max_size().end(), pbConf.max_size().end(),
std::back_inserter(maxSize_)); std::back_inserter(maxSize_));
std::copy(pbConf.aspect_ratio().begin(),
pbConf.aspect_ratio().end(),
std::back_inserter(aspectRatio_));
std::copy(pbConf.variance().begin(), std::copy(pbConf.variance().begin(),
pbConf.variance().end(), pbConf.variance().end(),
std::back_inserter(variance_)); std::back_inserter(variance_));
std::copy(pbConf.aspect_ratio().begin(),
pbConf.aspect_ratio().end(),
std::back_inserter(tmp));
// flip // flip
int inputRatioLength = aspectRatio_.size(); int inputRatioLength = tmp.size();
for (int index = 0; index < inputRatioLength; index++) for (int index = 0; index < inputRatioLength; index++) {
aspectRatio_.push_back(1 / aspectRatio_[index]); aspectRatio_.push_back(tmp[index]);
aspectRatio_.push_back(1.); aspectRatio_.push_back(1 / tmp[index]);
}
numPriors_ = aspectRatio_.size(); numPriors_ = aspectRatio_.size();
if (maxSize_.size() > 0) numPriors_++; if (maxSize_.size() > 0) numPriors_++;
return true; return true;
...@@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) {
for (int w = 0; w < layerWidth; ++w) { for (int w = 0; w < layerWidth; ++w) {
real centerX = (w + 0.5) * stepW; real centerX = (w + 0.5) * stepW;
real centerY = (h + 0.5) * stepH; real centerY = (h + 0.5) * stepH;
int minSize = 0; real minSize = 0;
for (size_t s = 0; s < minSize_.size(); s++) { for (size_t s = 0; s < minSize_.size(); s++) {
// first prior. // first prior.
minSize = minSize_[s]; minSize = minSize_[s];
int boxWidth = minSize; real boxWidth = minSize;
int boxHeight = minSize; real boxHeight = minSize;
// xmin, ymin, xmax, ymax. // xmin, ymin, xmax, ymax.
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
...@@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) {
CHECK_EQ(minSize_.size(), maxSize_.size()); CHECK_EQ(minSize_.size(), maxSize_.size());
// second prior. // second prior.
for (size_t s = 0; s < maxSize_.size(); s++) { for (size_t s = 0; s < maxSize_.size(); s++) {
int maxSize = maxSize_[s]; real maxSize = maxSize_[s];
boxWidth = boxHeight = sqrt(minSize * maxSize); boxWidth = boxHeight = sqrt(minSize * maxSize);
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
...@@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
outV->copyFrom(buffer_->data_, dim * 2); outV->copyFrom(buffer_->data_, dim * 2);
} }
REGISTER_LAYER(priorbox, PriorBoxLayer);
} // namespace paddle } // namespace paddle
...@@ -56,17 +56,16 @@ void SequencePoolLayer::forward(PassType passType) { ...@@ -56,17 +56,16 @@ void SequencePoolLayer::forward(PassType passType) {
CHECK_EQ(newBatchSize_, starts->getSize() - 1); CHECK_EQ(newBatchSize_, starts->getSize() - 1);
resetOutput(newBatchSize_, dim); resetOutput(newBatchSize_, dim);
if (type_) {
CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions. * thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions. * case, we should compute the new sequenceStartPositions.
*/ */
if (type_) { if (type_) {
output_.degradeSequence(input, useGpu_); CHECK(input.subSequenceStartPositions)
<< "when trans_type = seq, input must hasSubseq";
output_.degradeSequence(input);
} }
} }
......
...@@ -34,8 +34,7 @@ DECLARE_double(checkgrad_eps); ...@@ -34,8 +34,7 @@ DECLARE_double(checkgrad_eps);
DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(thread_local_rand_use_global_seed);
DECLARE_bool(prev_batch_state); DECLARE_bool(prev_batch_state);
// Do one forward pass of convTrans layer and check to see if its output // Do one forward pass of ConvLayer using either exconv or cudnn_conv
// matches the given result
MatrixPtr doOneConvTest(size_t imgSize, MatrixPtr doOneConvTest(size_t imgSize,
size_t output_x, size_t output_x,
size_t stride, size_t stride,
...@@ -46,22 +45,35 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -46,22 +45,35 @@ MatrixPtr doOneConvTest(size_t imgSize,
size_t groups, size_t groups,
MatrixPtr& inputData, MatrixPtr& inputData,
real* param, real* param,
bool useGpu) { bool useGpu,
bool isDeconv = false) {
TestConfig config; TestConfig config;
config.biasSize = numfilters; config.biasSize = numfilters;
string layerType;
if (useGpu) { if (useGpu) {
config.layerConfig.set_type("cudnn_conv"); layerType = (isDeconv) ? "cudnn_convt" : "cudnn_conv";
} else { } else {
config.layerConfig.set_type("exconv"); layerType = (isDeconv) ? "exconvt" : "exconv";
} }
config.layerConfig.set_type(layerType);
config.layerConfig.set_num_filters(numfilters); config.layerConfig.set_num_filters(numfilters);
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
size_t weightSize = channel * filter_size * filter_size * size_t weightSize = channel * filter_size * filter_size *
config.layerConfig.num_filters() / groups; config.layerConfig.num_filters() / groups;
config.inputDefs.push_back( if (isDeconv) {
{INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize}); config.inputDefs.push_back(
{INPUT_DATA, "layer_0", output_x * output_x * channel, weightSize});
config.layerConfig.set_size(imgSize * imgSize *
config.layerConfig.num_filters());
} else {
config.inputDefs.push_back(
{INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize});
config.layerConfig.set_size(output_x * output_x *
config.layerConfig.num_filters());
}
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf(); ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(filter_size); conv->set_filter_size(filter_size);
...@@ -72,12 +84,15 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -72,12 +84,15 @@ MatrixPtr doOneConvTest(size_t imgSize,
conv->set_stride(stride); conv->set_stride(stride);
conv->set_stride_y(stride); conv->set_stride_y(stride);
conv->set_groups(groups); conv->set_groups(groups);
conv->set_filter_channels(channel / groups);
conv->set_img_size(imgSize); conv->set_img_size(imgSize);
conv->set_output_x(output_x); conv->set_output_x(output_x);
config.layerConfig.set_size(conv->output_x() * conv->output_x() * if (isDeconv) {
config.layerConfig.num_filters()); conv->set_filter_channels(numfilters / groups);
} else {
conv->set_filter_channels(channel / groups);
}
config.layerConfig.set_name("conv"); config.layerConfig.set_name("conv");
std::vector<DataLayerPtr> dataLayers; std::vector<DataLayerPtr> dataLayers;
...@@ -105,6 +120,8 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -105,6 +120,8 @@ MatrixPtr doOneConvTest(size_t imgSize,
TEST(Layer, convParaUnified) { TEST(Layer, convParaUnified) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv ///
input = Matrix::create(1, 4 * 4, false, false); input = Matrix::create(1, 4 * 4, false, false);
real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1};
...@@ -121,7 +138,7 @@ TEST(Layer, convParaUnified) { ...@@ -121,7 +138,7 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param, param,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 4, resultGpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2, /* output_x */ 2,
...@@ -133,9 +150,42 @@ TEST(Layer, convParaUnified) { ...@@ -133,9 +150,42 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param, param,
true); /*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
/// TEST1 for deconv ///
input = Matrix::create(1, 2 * 2, false, false);
real inputDataT[] = {1, 2, 3, 4};
input->setData(inputDataT);
resultCpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 3,
/*channel*/ 1,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 3,
/*channel*/ 1,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param,
/*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST2 for conv ///
input = Matrix::create(1, 3 * 3 * 2, false, false); input = Matrix::create(1, 3 * 3 * 2, false, false);
real inputData2[] = { real inputData2[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
...@@ -153,7 +203,7 @@ TEST(Layer, convParaUnified) { ...@@ -153,7 +203,7 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param2, param2,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 3, resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2, /* output_x */ 2,
...@@ -165,9 +215,10 @@ TEST(Layer, convParaUnified) { ...@@ -165,9 +215,10 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param2, param2,
true); /*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
/// TEST3 for conv ///
real param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; real param3[] = {1, 2, 3, 4, 4, 3, 2, 1};
resultCpu = doOneConvTest(/* imgSize */ 3, resultCpu = doOneConvTest(/* imgSize */ 3,
...@@ -180,7 +231,66 @@ TEST(Layer, convParaUnified) { ...@@ -180,7 +231,66 @@ TEST(Layer, convParaUnified) {
/*groups*/ 2, /*groups*/ 2,
input, input,
param3, param3,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 2,
input,
param3,
/*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST2 for deconv ///
input = Matrix::create(1, 2 * 2 * 2, false, false);
real inputData2T[] = {1, 2, 3, 4, 5, 6, 7, 8};
input->setData(inputData2T);
resultCpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param2,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param2,
/*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST3 for deconv ///
resultCpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 2,
input,
param3,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 3, resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2, /* output_x */ 2,
...@@ -192,7 +302,8 @@ TEST(Layer, convParaUnified) { ...@@ -192,7 +302,8 @@ TEST(Layer, convParaUnified) {
/*groups*/ 2, /*groups*/ 2,
input, input,
param3, param3,
true); /*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
#endif #endif
} }
......
...@@ -166,15 +166,19 @@ TEST(Projection, scaling) { ...@@ -166,15 +166,19 @@ TEST(Projection, scaling) {
} }
} }
void testProjectionConv(size_t groups) { void testProjectionConv(size_t groups, bool isDeconv) {
const int NUM_FILTERS = 18; const int NUM_FILTERS = 18;
const int FILTER_SIZE = 2; const int FILTER_SIZE = 2;
const int FILTER_SIZE_Y = 3; const int FILTER_SIZE_Y = 4;
const int CHANNELS = 3; const int CHANNELS = 3;
const int IMAGE_SIZE = 16; const int IMAGE_SIZE = 16;
ProjectionConfig conf; ProjectionConfig conf;
conf.set_type("conv"); if (isDeconv) {
conf.set_type("convt");
} else {
conf.set_type("conv");
}
conf.set_num_filters(NUM_FILTERS); conf.set_num_filters(NUM_FILTERS);
ConvConfig* conv = conf.mutable_conv_conf(); ConvConfig* conv = conf.mutable_conv_conf();
...@@ -186,7 +190,11 @@ void testProjectionConv(size_t groups) { ...@@ -186,7 +190,11 @@ void testProjectionConv(size_t groups) {
conv->set_stride(2); conv->set_stride(2);
conv->set_stride_y(2); conv->set_stride_y(2);
conv->set_groups(groups); conv->set_groups(groups);
conv->set_filter_channels(conv->channels() / conv->groups()); if (isDeconv) {
conv->set_filter_channels(NUM_FILTERS / conv->groups());
} else {
conv->set_filter_channels(conv->channels() / conv->groups());
}
conv->set_img_size(IMAGE_SIZE); conv->set_img_size(IMAGE_SIZE);
int output_x = outputSize(conv->img_size(), int output_x = outputSize(conv->img_size(),
conv->filter_size(), conv->filter_size(),
...@@ -199,8 +207,14 @@ void testProjectionConv(size_t groups) { ...@@ -199,8 +207,14 @@ void testProjectionConv(size_t groups) {
conv->stride_y(), conv->stride_y(),
/* caffeMode */ true); /* caffeMode */ true);
conv->set_output_x(output_x); conv->set_output_x(output_x);
conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); conv->set_output_y(output_y);
conf.set_output_size(output_x * output_y * NUM_FILTERS); if (isDeconv) {
conf.set_input_size(output_x * output_y * CHANNELS);
conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS);
} else {
conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS);
conf.set_output_size(output_x * output_y * NUM_FILTERS);
}
testProjectionGrad(conf, testProjectionGrad(conf,
INPUT_DATA, INPUT_DATA,
...@@ -215,8 +229,12 @@ void testProjectionConv(size_t groups) { ...@@ -215,8 +229,12 @@ void testProjectionConv(size_t groups) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(Projection, conv) { TEST(Projection, conv) {
testProjectionConv(1); /// test ConvProjection
testProjectionConv(3); testProjectionConv(1, false);
testProjectionConv(3, false);
/// test ConvTransProjection
testProjectionConv(1, true);
testProjectionConv(3, true);
} }
#endif #endif
...@@ -385,11 +403,11 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) { ...@@ -385,11 +403,11 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384});
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf(); ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2); conv->set_filter_size(2);
conv->set_filter_size_y(3); conv->set_filter_size_y(4);
conv->set_channels(16); conv->set_channels(16);
conv->set_padding(0); conv->set_padding(0);
conv->set_padding_y(1); conv->set_padding_y(1);
...@@ -416,6 +434,9 @@ TEST(Layer, convTransLayer) { ...@@ -416,6 +434,9 @@ TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
} }
#ifndef PADDLE_ONLY_CPU
testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true);
#endif
} }
TEST(Layer, blockExpandLayer) { TEST(Layer, blockExpandLayer) {
...@@ -1482,16 +1503,20 @@ TEST(Layer, BatchNormalizationLayer) { ...@@ -1482,16 +1503,20 @@ TEST(Layer, BatchNormalizationLayer) {
#endif #endif
} }
TEST(Operator, conv) { void testConvOperator(bool isDeconv) {
TestConfig config; TestConfig config;
const int NUM_FILTERS = 16; const int NUM_FILTERS = 16;
const int FILTER_SIZE = 2; const int FILTER_SIZE = 2;
const int FILTER_SIZE_Y = 3; const int FILTER_SIZE_Y = 3;
const int CHANNELS = 3; const int CHANNELS = 3;
const int IMAGE_SIZE = 16; const int IMAGE_SIZE = 16;
const int IMAGE_SIZE_Y = 8; const int IMAGE_SIZE_Y = 9;
OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs();
operatorConf.set_type("conv"); if (isDeconv) {
operatorConf.set_type("convt");
} else {
operatorConf.set_type("conv");
}
ConvConfig* conv = operatorConf.mutable_conv_conf(); ConvConfig* conv = operatorConf.mutable_conv_conf();
operatorConf.set_num_filters(NUM_FILTERS); operatorConf.set_num_filters(NUM_FILTERS);
conv->set_filter_size(FILTER_SIZE); conv->set_filter_size(FILTER_SIZE);
...@@ -1502,7 +1527,6 @@ TEST(Operator, conv) { ...@@ -1502,7 +1527,6 @@ TEST(Operator, conv) {
conv->set_stride(2); conv->set_stride(2);
conv->set_stride_y(2); conv->set_stride_y(2);
conv->set_groups(1); conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(IMAGE_SIZE); conv->set_img_size(IMAGE_SIZE);
conv->set_img_size_y(IMAGE_SIZE_Y); conv->set_img_size_y(IMAGE_SIZE_Y);
conv->set_output_x(outputSize(conv->img_size(), conv->set_output_x(outputSize(conv->img_size(),
...@@ -1515,11 +1539,22 @@ TEST(Operator, conv) { ...@@ -1515,11 +1539,22 @@ TEST(Operator, conv) {
conv->padding_y(), conv->padding_y(),
conv->stride_y(), conv->stride_y(),
/* caffeMode */ true)); /* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
NUM_FILTERS);
config.inputDefs.push_back( if (isDeconv) {
{INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); conv->set_filter_channels(NUM_FILTERS / conv->groups());
config.inputDefs.push_back({INPUT_DATA,
"layer_0",
conv->output_x() * conv->output_y() * CHANNELS,
0});
config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS);
} else {
conv->set_filter_channels(conv->channels() / conv->groups());
config.inputDefs.push_back(
{INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0});
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
NUM_FILTERS);
}
config.inputDefs.push_back( config.inputDefs.push_back(
{INPUT_DATA, {INPUT_DATA,
"layer_1", "layer_1",
...@@ -1531,6 +1566,11 @@ TEST(Operator, conv) { ...@@ -1531,6 +1566,11 @@ TEST(Operator, conv) {
testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false);
} }
TEST(Operator, conv) {
testConvOperator(/*isDeconv*/ true);
testConvOperator(/*isDeconv*/ false);
}
TEST(Layer, FeatureMapExpandLayer) { TEST(Layer, FeatureMapExpandLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("featmap_expand"); config.layerConfig.set_type("featmap_expand");
...@@ -1602,6 +1642,39 @@ TEST(Layer, PadLayer) { ...@@ -1602,6 +1642,39 @@ TEST(Layer, PadLayer) {
} }
} }
TEST(Layer, CrossChannelNormLayer) {
TestConfig config;
config.layerConfig.set_type("norm");
config.layerConfig.set_size(100);
LayerInputConfig* input = config.layerConfig.add_inputs();
NormConfig* norm = input->mutable_norm_conf();
norm->set_norm_type("cross-channel-norm");
norm->set_channels(10);
norm->set_size(100);
norm->set_scale(0);
norm->set_pow(0);
norm->set_blocked(0);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
for (auto useGpu : {false, true}) {
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5);
}
}
TEST(Layer, smooth_l1) {
TestConfig config;
config.layerConfig.set_type("smooth_l1");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0);
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) { ...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
true_type() /* bAsRowVector */, false_type()); true_type() /* bAsRowVector */, false_type());
} }
template<class T>
void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<class T>
void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) { int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
......
...@@ -545,6 +545,9 @@ public: ...@@ -545,6 +545,9 @@ public:
void mulRowVector(BaseMatrixT& b); void mulRowVector(BaseMatrixT& b);
void divRowVector(BaseMatrixT& b); void divRowVector(BaseMatrixT& b);
void mulColVector(BaseMatrixT& b);
void divColVector(BaseMatrixT& b);
void addP2P(BaseMatrixT& b); void addP2P(BaseMatrixT& b);
/** /**
......
...@@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) { ...@@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) {
} }
} }
void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
size_t numSamples = getHeight();
size_t dim = output.getWidth();
CHECK_EQ(label.getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1);
real* out = output.getData();
real* cost = getData();
real* lbl = label.getData();
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
cost[j] = std::fabs(out[j] - lbl[j]);
if (cost[j] < 1.0)
cost[j] = 0.5 * cost[j] * cost[j];
else
cost[j] = cost[j] - 0.5;
}
}
}
void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
size_t numSamples = getHeight();
size_t dim = output.getWidth();
CHECK_EQ(label.getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1);
real* out = output.getData();
real* cost = getData();
real* lbl = label.getData();
// f'(x) = x if |x| < 1
// = sign(x) otherwise
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
cost[j] = out[j] - lbl[j];
if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0);
}
}
}
void CpuMatrix::tanh(Matrix& output) { void CpuMatrix::tanh(Matrix& output) {
CHECK(isContiguous()); CHECK(isContiguous());
CHECK(output.isContiguous()); CHECK(output.isContiguous());
......
...@@ -783,6 +783,14 @@ public: ...@@ -783,6 +783,14 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
virtual void smoothL1(Matrix& output, Matrix& label) {
LOG(FATAL) << "Not implemented";
}
virtual void smoothL1Bp(Matrix& outputV, Matrix& label) {
LOG(FATAL) << "Not implemented";
}
virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; }
virtual void tanhDerivative(Matrix& output) { virtual void tanhDerivative(Matrix& output) {
...@@ -1720,6 +1728,9 @@ public: ...@@ -1720,6 +1728,9 @@ public:
/// gradient of sumOfSquares. /// gradient of sumOfSquares.
void sumOfSquaresBp(Matrix& outputV, Matrix& label); void sumOfSquaresBp(Matrix& outputV, Matrix& label);
void smoothL1(Matrix& output, Matrix& label);
void smoothL1Bp(Matrix& output, Matrix& label);
void tanh(Matrix& output); void tanh(Matrix& output);
void tanhDerivative(Matrix& output); void tanhDerivative(Matrix& output);
......
...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) { ...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) {
compare(&BaseMatrix::addRowVector); compare(&BaseMatrix::addRowVector);
compare(&BaseMatrix::mulRowVector); compare(&BaseMatrix::mulRowVector);
compare(&BaseMatrix::divRowVector); compare(&BaseMatrix::divRowVector);
compare(&BaseMatrix::mulColVector);
compare(&BaseMatrix::divColVector);
compare(&BaseMatrix::addP2P); compare(&BaseMatrix::addP2P);
compare(&BaseMatrix::invSqrt); compare(&BaseMatrix::invSqrt);
} }
......
...@@ -123,46 +123,6 @@ static void resizeAndCopy(ICpuGpuVectorPtr& dest, ...@@ -123,46 +123,6 @@ static void resizeAndCopy(ICpuGpuVectorPtr& dest,
} }
} }
static void resizeAndCopy(UserDefinedVectorPtr& dest,
const UserDefinedVectorPtr& src,
bool useGpu,
hl_stream_t stream) {
if (src) {
CHECK(!useGpu) << "not implemented";
size_t height = src->size();
if (!dest) {
dest = std::make_shared<std::vector<void*>>(height);
} else {
dest->resize(height);
}
std::copy_n(src->begin(), height, dest->begin());
} else {
dest.reset();
}
}
static void resizeAndCopy(UserDefinedVectorPtr& dest,
const UserDefinedVectorPtr& src,
int32_t startPos,
int32_t copySize,
bool useGpu,
hl_stream_t stream = HPPL_STREAM_DEFAULT) {
if (src) {
CHECK(!useGpu) << "not implemented";
CHECK_LE((size_t)startPos + copySize, src->size());
size_t height = copySize;
if (!dest) {
dest = std::make_shared<std::vector<void*>>(height);
} else {
dest->resize(height);
}
std::copy_n(src->begin() + startPos, height, dest->begin());
} else {
dest.reset();
}
}
static void resizeAndCopy(SVectorPtr& dest, static void resizeAndCopy(SVectorPtr& dest,
const SVectorPtr& src, const SVectorPtr& src,
bool useGpu, bool useGpu,
...@@ -223,7 +183,6 @@ void Argument::resizeAndCopyFrom(const Argument& src, ...@@ -223,7 +183,6 @@ void Argument::resizeAndCopyFrom(const Argument& src,
false /* useGpu */, false /* useGpu */,
stream); stream);
} }
resizeAndCopy(udp, src.udp, useGpu, stream);
resizeAndCopy(strs, src.strs, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream);
frameWidth = src.frameWidth; frameWidth = src.frameWidth;
frameHeight = src.frameHeight; frameHeight = src.frameHeight;
...@@ -255,7 +214,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, ...@@ -255,7 +214,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
resizeAndCopy(value, src.value, startRow, copySize, useGpu, stream); resizeAndCopy(value, src.value, startRow, copySize, useGpu, stream);
resizeAndCopy(grad, src.grad, startRow, copySize, useGpu, stream); resizeAndCopy(grad, src.grad, startRow, copySize, useGpu, stream);
resizeAndCopy(ids, src.ids, startRow, copySize, useGpu, stream); resizeAndCopy(ids, src.ids, startRow, copySize, useGpu, stream);
resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream);
resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream); resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream);
return copySize; return copySize;
} else { } else {
...@@ -268,7 +226,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, ...@@ -268,7 +226,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
resizeAndCopy(value, src.value, startRow, copyFeatureSize, useGpu, stream); resizeAndCopy(value, src.value, startRow, copyFeatureSize, useGpu, stream);
resizeAndCopy(grad, src.grad, startRow, copyFeatureSize, useGpu, stream); resizeAndCopy(grad, src.grad, startRow, copyFeatureSize, useGpu, stream);
resizeAndCopy(ids, src.ids, startRow, copyFeatureSize, useGpu, stream); resizeAndCopy(ids, src.ids, startRow, copyFeatureSize, useGpu, stream);
resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream);
resizeAndCopy(sequenceStartPositions, resizeAndCopy(sequenceStartPositions,
src.sequenceStartPositions, src.sequenceStartPositions,
startSeq, startSeq,
...@@ -583,7 +540,7 @@ void Argument::checkSubset() const { ...@@ -583,7 +540,7 @@ void Argument::checkSubset() const {
} }
} }
void Argument::degradeSequence(const Argument& input, bool useGpu) { void Argument::degradeSequence(const Argument& input) {
CHECK_EQ(input.hasSubseq(), 1UL); CHECK_EQ(input.hasSubseq(), 1UL);
size_t numSequences = input.getNumSequences(); size_t numSequences = input.getNumSequences();
size_t numSubSequences = input.getNumSubSequences(); size_t numSubSequences = input.getNumSubSequences();
......
...@@ -24,8 +24,6 @@ limitations under the License. */ ...@@ -24,8 +24,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
// vector of user defined pointers
typedef std::shared_ptr<std::vector<void*>> UserDefinedVectorPtr;
typedef std::shared_ptr<std::vector<std::string>> SVectorPtr; typedef std::shared_ptr<std::vector<std::string>> SVectorPtr;
struct Argument { struct Argument {
...@@ -40,7 +38,6 @@ struct Argument { ...@@ -40,7 +38,6 @@ struct Argument {
sequenceStartPositions(nullptr), sequenceStartPositions(nullptr),
subSequenceStartPositions(nullptr), subSequenceStartPositions(nullptr),
cpuSequenceDims(nullptr), cpuSequenceDims(nullptr),
udp(nullptr),
deviceId(-1), deviceId(-1),
allCount(0), allCount(0),
valueCount(0), valueCount(0),
...@@ -63,7 +60,6 @@ struct Argument { ...@@ -63,7 +60,6 @@ struct Argument {
sequenceStartPositions = argument.sequenceStartPositions; sequenceStartPositions = argument.sequenceStartPositions;
subSequenceStartPositions = argument.subSequenceStartPositions; subSequenceStartPositions = argument.subSequenceStartPositions;
cpuSequenceDims = argument.cpuSequenceDims; cpuSequenceDims = argument.cpuSequenceDims;
udp = argument.udp;
deviceId = argument.deviceId; deviceId = argument.deviceId;
allCount = argument.allCount; allCount = argument.allCount;
frameHeight = argument.frameHeight; frameHeight = argument.frameHeight;
...@@ -96,8 +92,6 @@ struct Argument { ...@@ -96,8 +92,6 @@ struct Argument {
// dimension of sequence, stored only in CPU // dimension of sequence, stored only in CPU
IVectorPtr cpuSequenceDims; IVectorPtr cpuSequenceDims;
UserDefinedVectorPtr udp; // user defined pointer
int deviceId; // the GPU device id which the argument in int deviceId; // the GPU device id which the argument in
int allCount; // the number of output layers using this argument int allCount; // the number of output layers using this argument
mutable int valueCount; // waiting this member when layer do forward mutable int valueCount; // waiting this member when layer do forward
...@@ -137,7 +131,6 @@ struct Argument { ...@@ -137,7 +131,6 @@ struct Argument {
if (ids) return ids->getSize(); if (ids) return ids->getSize();
if (grad) return grad->getHeight(); if (grad) return grad->getHeight();
if (in) return in->getHeight(); if (in) return in->getHeight();
if (udp) return udp->size();
if (strs) return strs->size(); if (strs) return strs->size();
return 0; return 0;
} }
...@@ -296,7 +289,7 @@ struct Argument { ...@@ -296,7 +289,7 @@ struct Argument {
/* /*
sequence has sub-sequence degrades to a sequence. sequence has sub-sequence degrades to a sequence.
*/ */
void degradeSequence(const Argument& input, bool useGpu); void degradeSequence(const Argument& input);
/** /**
* @brief getValueString will return the argument's output in string. There * @brief getValueString will return the argument's output in string. There
......
swig_paddle.py swig_paddle.py
_swig_paddle.so
...@@ -83,13 +83,18 @@ docker build -t paddle:dev . ...@@ -83,13 +83,18 @@ docker build -t paddle:dev .
The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo. The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo.
Users can specify a Ubuntu mirror server for faster downloading:
```bash
docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com/mirrors.txt .
```
### Build PaddlePaddle from Source Code ### Build PaddlePaddle from Source Code
Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host): Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host):
```bash ```bash
docker run -v $PWD:/paddle -e "GPU=OFF" -e "AVX=ON" -e "TEST=ON" paddle:dev docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=OFF" -e "RUN_TEST=OFF" paddle:dev
``` ```
This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed. This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed.
...@@ -100,6 +105,14 @@ This command mounts the source directory on the host into `/paddle` in the conta ...@@ -100,6 +105,14 @@ This command mounts the source directory on the host into `/paddle` in the conta
- `$PWD/build/paddle-<version>.deb` for production installation, and - `$PWD/build/paddle-<version>.deb` for production installation, and
- `$PWD/build/Dockerfile`, which builds the production Docker image. - `$PWD/build/Dockerfile`, which builds the production Docker image.
Users can specify the following Docker build arguments with either "ON" or "OFF" value:
- `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries.
- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON".
- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command:
```bash
docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall"
```
- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it.
### Build the Production Docker Image ### Build the Production Docker Image
......
#!/bin/bash #!/bin/bash
function abort(){
echo "An error occurred. Exiting..." 1>&2
exit 1
}
trap 'abort' 0
set -e set -e
mkdir -p /paddle/dist/cpu
mkdir -p /paddle/dist/gpu # Set BASE_IMAGE according to env variables
mkdir -p /paddle/dist/cpu-noavx
mkdir -p /paddle/dist/gpu-noavx
# Set BASE_IMAGE and DEB_PATH according to env variables
if [ ${WITH_GPU} == "ON" ]; then if [ ${WITH_GPU} == "ON" ]; then
BASE_IMAGE="nvidia/cuda:7.5-cudnn5-runtime-ubuntu14.04" BASE_IMAGE="nvidia/cuda:7.5-cudnn5-runtime-ubuntu14.04"
# additional packages to install when building gpu images # additional packages to install when building gpu images
GPU_DOCKER_PKG="python-pip" GPU_DOCKER_PKG="python-pip python-dev"
if [ ${WITH_AVX} == "ON" ]; then
DEB_PATH="dist/gpu/"
DOCKER_SUFFIX="gpu"
else
DEB_PATH="dist/gpu-noavx/"
DOCKER_SUFFIX="gpu-noavx"
fi
else else
BASE_IMAGE="python:2.7.13-slim" BASE_IMAGE="python:2.7.13-slim"
if [ ${WITH_AVX} == "ON" ]; then
DEB_PATH="dist/cpu/"
DOCKER_SUFFIX="cpu"
else
DEB_PATH="dist/cpu-noavx/"
DOCKER_SUFFIX="noavx"
fi
fi fi
# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied
# source tree to /paddle, and this scripts should build it into
# /paddle/build.
if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so
fi
mkdir -p /paddle/build # -p means no error if exists DOCKERFILE_GPU_ENV=""
cd /paddle/build if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
# clean local cmake and third_party cache DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
if [ ${DELETE_BUILD_CACHE} == 'ON' ]; then
rm -rf * && rm -rf ../third_party
fi
cmake .. \
-DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_SWIG_PY=ON \
-DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=OFF \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
make -j `nproc`
make install
# generate deb package for current build
# FIXME(typhoonzero): should we remove paddle/scripts/deb ?
# FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must
# install them in docker
cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" ..
mv /paddle/build/*.deb /paddle/${DEB_PATH}
if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then # for cmake to find cudnn
apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so
# Install woboq_codebrowser. fi
git clone https://github.com/woboq/woboq_codebrowser /woboq
cd /woboq
cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
-DCMAKE_BUILD_TYPE=Release \
.
make
export WOBOQ_OUT=/usr/share/nginx/html/paddle mkdir -p /paddle/build
export BUILD_DIR=/paddle/build cd /paddle/build
mkdir -p $WOBOQ_OUT
cp -rv /woboq/data $WOBOQ_OUT/../data # build script will not fail if *.deb does not exist
/woboq/generator/codebrowser_generator \ rm *.deb 2>/dev/null || true
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_SWIG_PY=ON \
-DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
-DON_COVERALLS=${WITH_TEST:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
make -j `nproc`
if [[ ${RUN_TEST:-OFF} == "ON" ]]; then
make coveralls
fi
make install
# generate deb package for current build
# FIXME(typhoonzero): should we remove paddle/scripts/deb ?
# FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must
# install them in docker
cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" ..
if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then
apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev
# Install woboq_codebrowser.
git clone https://github.com/woboq/woboq_codebrowser /woboq
cd /woboq
cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
-DCMAKE_BUILD_TYPE=Release \
.
make
export WOBOQ_OUT=/usr/share/nginx/html/paddle
export BUILD_DIR=/paddle/build
mkdir -p $WOBOQ_OUT
cp -rv /woboq/data $WOBOQ_OUT/../data
/woboq/generator/codebrowser_generator \
-b /paddle/build \ -b /paddle/build \
-a \ -a \
-o $WOBOQ_OUT \ -o $WOBOQ_OUT \
-p paddle:/paddle -p paddle:/paddle
/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
cd /woboq cd /woboq
make clean make clean
fi
pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
paddle version
if [[ ${DOCKER_BUILD:-FALSE} == 'TRUE' ]]; then
# reduce docker image size
rm -rf /paddle/build
rm -rf /usr/local/opt/paddle/share/wheels/
fi
fi fi
paddle version
# generate production docker image Dockerfile # generate production docker image Dockerfile
if [ ${USE_MIRROR} ]; then if [ ${USE_MIRROR} ]; then
MIRROR_UPDATE="sed 's@http:\/\/archive.ubuntu.com\/ubuntu\/@mirror:\/\/mirrors.ubuntu.com\/mirrors.txt@' -i /etc/apt/sources.list && \\" MIRROR_UPDATE="sed 's@http:\/\/archive.ubuntu.com\/ubuntu\/@mirror:\/\/mirrors.ubuntu.com\/mirrors.txt@' -i /etc/apt/sources.list && \\"
...@@ -106,39 +80,23 @@ else ...@@ -106,39 +80,23 @@ else
MIRROR_UPDATE="\\" MIRROR_UPDATE="\\"
fi fi
cat > /paddle/build/Dockerfile.${DOCKER_SUFFIX} <<EOF cat > /paddle/build/Dockerfile <<EOF
FROM ${BASE_IMAGE} FROM ${BASE_IMAGE}
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
# ENV variables
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV WITH_GPU=${WITH_GPU}
ENV WITH_AVX=\${WITH_AVX:-ON}
ENV WITH_DOC=\${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=\${WITH_STYLE_CHECK:-OFF}
ENV HOME /root ENV HOME /root
ENV LANG en_US.UTF-8 ENV LANG en_US.UTF-8
# Use Fix locales to en_US.UTF-8 # Use Fix locales to en_US.UTF-8
RUN ${MIRROR_UPDATE} RUN ${MIRROR_UPDATE}
apt-get update && \ apt-get update && \
apt-get install -y libgfortran3 ${GPU_DOCKER_PKG} && \ apt-get install -y libgfortran3 libpython2.7 ${GPU_DOCKER_PKG} && \
apt-get clean -y && \ apt-get clean -y && \
pip install --upgrade pip && \ pip install --upgrade pip && \
pip install -U 'protobuf==3.1.0' requests pip install -U 'protobuf==3.1.0' requests numpy
RUN pip install numpy
# Use different deb file when building different type of images # Use different deb file when building different type of images
ADD \$PWD/${DEB_PATH}*.deb /usr/local/opt/paddle/deb/ ADD build/*.deb /usr/local/opt/paddle/deb/
RUN dpkg --force-all -i /usr/local/opt/paddle/deb/*.deb && rm -f /usr/local/opt/paddle/deb/*.deb # run paddle version to install python packages first
RUN dpkg -i /usr/local/opt/paddle/deb/*.deb && rm -f /usr/local/opt/paddle/deb/*.deb && paddle version
ENV PATH="/usr/local/opt/paddle/bin/:${PATH}" ${DOCKERFILE_GPU_ENV}
# default command shows the paddle version and exit # default command shows the paddle version and exit
CMD ["paddle", "version"] CMD ["paddle", "version"]
EOF EOF
trap : 0
...@@ -94,16 +94,22 @@ else: ...@@ -94,16 +94,22 @@ else:
EOF EOF
if [ $? -eq 1 ]; then # Older version installed, or not installed at all if [ $? -eq 1 ]; then # Older version installed, or not installed at all
echo "First time run paddle, need to install some python dependencies." echo "First time run paddle, need to install some python dependencies."
BASEDIR=$(dirname "$0") # setuptools normalizes package version, so we need to use normalized
pip install ${BASEDIR}/../opt/paddle/share/wheels/*-@PADDLE_VERSION@-*.whl # package version for paddle python package
if [ $? -ne 0 ]; then PYTHON_PADDLE_VERSION=$(python -c 'import packaging
echo "pip install wheels failed. " import setuptools
echo "Please use 'sudo paddle' at the first time you use PaddlePaddle" print str(packaging.version.Version("@PADDLE_VERSION@"))
echo "PaddlePaddle will install some python dependencies automatically." ' 2>/dev/null)
exit 1 BASEDIR=$(dirname "$0")
fi pip install ${BASEDIR}/../opt/paddle/share/wheels/*-${PYTHON_PADDLE_VERSION}-*.whl
echo "Python dependencies are installed." if [ $? -ne 0 ]; then
echo "pip install wheels failed. "
echo "Please use 'sudo paddle' at the first time you use PaddlePaddle"
echo "PaddlePaddle will install some python dependencies automatically."
exit 1
fi
echo "Python dependencies are installed."
fi fi
case "$1" in case "$1" in
......
...@@ -5,7 +5,7 @@ NPROC=1 ...@@ -5,7 +5,7 @@ NPROC=1
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONHOME=/opt/python/2.7.12 export PYTHONHOME=/opt/python/2.7.12
export PATH=/opt/python/2.7.12/bin:${PATH} export PATH=/opt/python/2.7.12/bin:${PATH}
cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DWITH_COVERAGE=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NRPOC=`nproc` NRPOC=`nproc`
make -j $NPROC make -j $NPROC
make coveralls make coveralls
......
...@@ -12,68 +12,19 @@ ...@@ -12,68 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This file is used to build paddle python binding package.
# It will be invoked by Makefile that generated by COMAKE
from setuptools import setup, Extension from setuptools import setup, Extension
import numpy as np
import api.paddle_ld_flags
import platform
import os
system = platform.system().lower()
is_osx = (system == 'darwin')
is_win = (system == 'windows')
is_lin = (system == 'linux')
# The extra links will passed from COMAKE
# because generate paddle LDFLAGS is too complicated to do in setup.py
# it just read COMAKE generated LDFLAGS.
extra_comps = []
extra_links = []
obj = api.paddle_ld_flags.PaddleLDFlag()
extra_comps = obj.c_flag()
ldflags = obj.ldflag_str()
if ldflags is not None:
extra_links.extend(ldflags.split(" "))
try:
with open('.py_paddle_extra_link_flags', 'r') as f:
for line in f:
extra_links += line.split()
except:
pass
if is_lin == True:
extra_links = ["-Xlinker", '-start-group'] + extra_links + ["-Xlinker", "-end-group"]
elif is_osx == True:
os.environ["ARCHFLAGS"] = "-arch x86_64"
extra_links = ["-Wl,-all_load"] + extra_links
include_dirs = [np.get_include(), "../"] # include numpy and paddle.
os.environ["CC"] = "@CMAKE_C_COMPILER@"
os.environ["CXX"] = "@CMAKE_CXX_COMPILER@"
setup(name="py_paddle", setup(name="py_paddle",
version="@PADDLE_VERSION@", version="${PADDLE_VERSION}",
ext_modules=[ packages=['py_paddle'],
Extension('py_paddle._swig_paddle', # Build SWIG Extension. include_package_data=True,
['Paddle_wrap.cxx'], package_data={'py_paddle':['*.py','_swig_paddle.so']},
language = "c++", install_requires = [
include_dirs = include_dirs, 'nltk>=3.2.2',
extra_link_args = extra_links, 'numpy>=1.8.0', # The numpy is required.
extra_compile_args = extra_comps 'protobuf>=${PROTOBUF_VERSION}' # The paddle protobuf version
) ],
], url='http://www.paddlepaddle.org/',
packages=['py_paddle'], license='Apache 2.0',
include_dirs = include_dirs,
install_requires = [
'nltk>=3.2.2',
'numpy>=1.8.0', # The numpy is required.
'protobuf>=3.0.0' # The paddle protobuf version
],
) )
...@@ -12,6 +12,7 @@ limitations under the License. */ ...@@ -12,6 +12,7 @@ limitations under the License. */
#pragma once #pragma once
#include "Common.h" #include "Common.h"
#include "Error.h"
namespace paddle { namespace paddle {
...@@ -97,4 +98,37 @@ private: ...@@ -97,4 +98,37 @@ private:
#define HAS_AVX512 HAS_SIMD(SIMD_AVX512) #define HAS_AVX512 HAS_SIMD(SIMD_AVX512)
// clang-format on // clang-format on
/**
* Invoke checkCPUFeature() before Paddle initialization to
* check target machine whether support compiled instructions.
* If not, simply throw out an error.
*/
inline Error __must_check checkCPUFeature() {
Error err;
#ifndef __AVX__
if (HAS_AVX) {
LOG(WARNING) << "PaddlePaddle wasn't compiled to use avx instructions, "
<< "but these are available on your machine and could "
<< "speed up CPU computations via CMAKE .. -DWITH_AVX=ON";
}
#else
if (!HAS_AVX) {
err = Error(
"PaddlePaddle was compiled to use avx instructions, "
"but these aren't available on your machine, please "
"disable it via CMAKE .. -DWITH_AVX=OFF");
}
#endif // __AVX__
#ifdef __SSE3__
if (!HAS_SSE3) {
err = Error(
"PaddlePaddle was compiled to use sse3 instructions, "
"which is the minimum requirement of PaddlePaddle. "
"But these aren't available on your current machine.");
}
#endif // __SSE3__
return err;
}
} // namespace paddle } // namespace paddle
...@@ -195,9 +195,14 @@ extern const char enable_virtualenv_py[]; ...@@ -195,9 +195,14 @@ extern const char enable_virtualenv_py[];
} }
void initPython(int argc, char** argv) { void initPython(int argc, char** argv) {
#ifndef PADDLE_NO_PYTHON #ifndef PADDLE_NO_PYTHON
char pyHome[] = "@PYTHON_INSTALL_DIR@"; // NOLINT std::string pyHome;
if (strlen(pyHome)) { #if defined(__APPLE__) || defined(__OSX__)
Py_SetPythonHome(pyHome); pyHome = "/usr/local/Frameworks/Python.framework/Versions/2.7";
Py_SetPythonHome(const_cast<char*>(pyHome.c_str()));
#endif
pyHome = "@PYTHON_INSTALL_DIR@"; // NOLINT
if (!pyHome.empty()) {
Py_SetPythonHome(const_cast<char*>(pyHome.c_str()));
} }
Py_SetProgramName(argv[0]); Py_SetProgramName(argv[0]);
Py_Initialize(); Py_Initialize();
......
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "CpuId.h"
#include "CustomStackTrace.h" #include "CustomStackTrace.h"
#include "Logging.h" #include "Logging.h"
#include "StringUtil.h" #include "StringUtil.h"
...@@ -185,6 +186,7 @@ void initMain(int argc, char** argv) { ...@@ -185,6 +186,7 @@ void initMain(int argc, char** argv) {
} }
version::printVersion(); version::printVersion();
checkCPUFeature().check();
runInitFunctions(); runInitFunctions();
} }
......
...@@ -686,25 +686,17 @@ class ContextProjection(Projection): ...@@ -686,25 +686,17 @@ class ContextProjection(Projection):
@config_class @config_class
class ConvProjection(Projection): class ConvBaseProjection(Projection):
type = 'conv'
def __init__(self, def __init__(self,
input_layer_name, input_layer_name,
num_filters=None, num_filters=None,
conv_conf=None, conv_conf=None,
**xargs): **xargs):
super(ConvProjection, self).__init__(input_layer_name, **xargs) super(ConvBaseProjection, self).__init__(input_layer_name, **xargs)
if num_filters is not None: if num_filters is not None:
self.proj_conf.num_filters = num_filters self.proj_conf.num_filters = num_filters
parse_conv(conv_conf, input_layer_name, self.proj_conf.conv_conf,
num_filters)
self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \
self.proj_conf.conv_conf.output_y * \
num_filters
def calc_output_size(self, input_layer_config): def calc_output_size(self, input_layer_config):
return self.proj_conf.output_size return self.proj_conf.output_size
...@@ -723,6 +715,48 @@ class ConvProjection(Projection): ...@@ -723,6 +715,48 @@ class ConvProjection(Projection):
return None return None
@config_class
class ConvProjection(ConvBaseProjection):
type = 'conv'
def __init__(self,
input_layer_name,
num_filters=None,
conv_conf=None,
**xargs):
super(ConvProjection, self).__init__(input_layer_name, num_filters,
conv_conf, **xargs)
parse_conv(conv_conf, self.input_layer_name, self.proj_conf.conv_conf,
num_filters)
self.proj_conf.output_size = self.proj_conf.conv_conf.output_x * \
self.proj_conf.conv_conf.output_y * \
num_filters
@config_class
class ConvTransProjection(ConvBaseProjection):
type = 'convt'
def __init__(self,
input_layer_name,
num_filters=None,
conv_conf=None,
**xargs):
super(ConvTransProjection, self).__init__(input_layer_name, num_filters,
conv_conf, **xargs)
parse_conv(
conv_conf,
self.input_layer_name,
self.proj_conf.conv_conf,
num_filters,
trans=True)
self.proj_conf.output_size = self.proj_conf.conv_conf.img_size_y * \
self.proj_conf.conv_conf.img_size * \
num_filters
# Define a operator for mixed layer # Define a operator for mixed layer
@config_class @config_class
class Operator(Cfg): class Operator(Cfg):
...@@ -789,6 +823,36 @@ class ConvOperator(Operator): ...@@ -789,6 +823,36 @@ class ConvOperator(Operator):
return self.operator_conf.output_size return self.operator_conf.output_size
@config_class
class ConvTransOperator(Operator):
type = 'convt'
def __init__(self,
input_layer_names,
num_filters=None,
conv_conf=None,
**xargs):
super(ConvTransOperator, self).__init__(input_layer_names, **xargs)
if num_filters is not None:
self.operator_conf.num_filters = num_filters
parse_conv(
conv_conf,
MakeLayerNameInSubmodel(input_layer_names[0]),
self.operator_conf.conv_conf,
num_filters,
trans=True)
self.operator_conf.output_size = \
self.operator_conf.conv_conf.img_size * \
self.operator_conf.conv_conf.img_size_y * \
num_filters
config_assert(len(input_layer_names) == 2, "Conv is binary operator")
def calc_output_size(self, input_sizes):
return self.operator_conf.output_size
# please refer to the comments in proto/ModelConfig.proto # please refer to the comments in proto/ModelConfig.proto
@config_class @config_class
class Conv(Cfg): class Conv(Cfg):
...@@ -1156,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf): ...@@ -1156,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf):
def parse_norm(norm, input_layer_name, norm_conf): def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.norm_type = norm.norm_type norm_conf.norm_type = norm.norm_type
config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], config_assert(
"norm-type %s is not in [rnorm, 'cmrnorm-projection']" % norm.norm_type in
norm.norm_type) ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'],
"norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]"
% norm.norm_type)
norm_conf.channels = norm.channels norm_conf.channels = norm.channels
norm_conf.size = norm.size norm_conf.size = norm.size
norm_conf.scale = norm.scale norm_conf.scale = norm.scale
...@@ -1772,8 +1838,17 @@ class ConvTransLayerBase(LayerBase): ...@@ -1772,8 +1838,17 @@ class ConvTransLayerBase(LayerBase):
use_gpu = int(g_command_config_args.get("use_gpu", 0)) use_gpu = int(g_command_config_args.get("use_gpu", 0))
parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0))
# cudnn_convt has not been implemented so use exconvt only # Automatically select cudnn_type for GPU and exconvt for CPU
self.layer_type = "exconvt" # if set type=exconvt, but still reserve the way user specify
# exconvt or cudnn_convt manually.
if self.layer_type == "cudnn_convt":
config_assert(use_gpu, "cudnn_convt only support GPU")
if (use_gpu == 1 and self.layer_type != "exconvt" and
(parallel_nn == 0 or self.config.device > -1)):
self.layer_type = "cudnn_convt"
else:
self.layer_type = "exconvt"
# need to specify layer in config # need to specify layer in config
self.config.type = self.layer_type self.config.type = self.layer_type
...@@ -1790,10 +1865,9 @@ class ConvTransLayerBase(LayerBase): ...@@ -1790,10 +1865,9 @@ class ConvTransLayerBase(LayerBase):
trans=True) trans=True)
conv_conf = self.config.inputs[input_index].conv_conf conv_conf = self.config.inputs[input_index].conv_conf
psize = self.calc_parameter_size(conv_conf) psize = self.calc_parameter_size(conv_conf)
print("output size for %s is %d " % (name, conv_conf.output_x))
self.create_input_parameter(input_index, psize) self.create_input_parameter(input_index, psize)
self.set_layer_size( self.set_cnn_layer(name, conv_conf.img_size_y, conv_conf.img_size,
(conv_conf.img_size**2) * self.config.num_filters) self.config.num_filters)
psize = self.config.size psize = self.config.size
if shared_biases: if shared_biases:
...@@ -1810,6 +1884,11 @@ class ConvTransLayer(ConvTransLayerBase): ...@@ -1810,6 +1884,11 @@ class ConvTransLayer(ConvTransLayerBase):
layer_type = 'exconvt' layer_type = 'exconvt'
@config_layer('cudnn_convt')
class ConvTransLayer(ConvTransLayerBase):
layer_type = 'cudnn_convt'
@config_layer('norm') @config_layer('norm')
class NormLayer(LayerBase): class NormLayer(LayerBase):
def __init__(self, name, inputs, **xargs): def __init__(self, name, inputs, **xargs):
...@@ -1821,6 +1900,9 @@ class NormLayer(LayerBase): ...@@ -1821,6 +1900,9 @@ class NormLayer(LayerBase):
norm_conf) norm_conf)
self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
norm_conf.channels, False) norm_conf.channels, False)
if norm_conf.norm_type == "cross-channel-norm":
self.create_input_parameter(0, norm_conf.channels,
[norm_conf.channels, 1])
@config_layer('pool') @config_layer('pool')
...@@ -2222,7 +2304,10 @@ def Link( ...@@ -2222,7 +2304,10 @@ def Link(
# memory for recurrent layer group. # memory for recurrent layer group.
# *name* and *size* are actual layer's name and size. # *name* and *size* are actual layer's name and size.
# will return name of the memory, # If *name* is None, need to provide *memory_name* and need to use
# SetMemoryInput() later to specify the layer which this memory remembers.
#
# return the name of the memory,
# use this name if you assign the memory as other layer's input # use this name if you assign the memory as other layer's input
# #
# boot frame of memory is zeroed by default, # boot frame of memory is zeroed by default,
...@@ -2234,15 +2319,18 @@ def Link( ...@@ -2234,15 +2319,18 @@ def Link(
# can only be initailized by a *boot_layer* which is a sequence. # can only be initailized by a *boot_layer* which is a sequence.
# #
@config_func @config_func
def Memory( def Memory(name,
name, size,
size, is_sequence=False,
is_sequence=False, boot_layer=None,
boot_layer=None, boot_bias=False,
boot_bias=False, boot_bias_active_type="",
boot_bias_active_type="", boot_with_const_id=None,
boot_with_const_id=None, ): memory_name=None):
agent_name = name + "+delay1" if not memory_name:
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence: if is_sequence:
agent_layer = SequenceAgentLayer(agent_name, size) agent_layer = SequenceAgentLayer(agent_name, size)
else: else:
...@@ -2250,7 +2338,8 @@ def Memory( ...@@ -2250,7 +2338,8 @@ def Memory(
config_assert(g_current_submodel.is_recurrent_layer_group, config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only') 'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add() memory = g_current_submodel.memories.add()
memory.layer_name = MakeLayerNameInSubmodel(name) if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias), options = sum((boot_layer is not None, bool(boot_bias),
...@@ -2274,6 +2363,17 @@ def Memory( ...@@ -2274,6 +2363,17 @@ def Memory(
return agent_name return agent_name
@config_func
def SetMemoryInput(memory_name, layer_name):
memory_name = MakeLayerNameInSubmodel(memory_name)
layer_name = MakeLayerNameInSubmodel(layer_name)
for mem in g_current_submodel.memories:
if mem.link_name == memory_name:
mem.layer_name = layer_name
return
logger.fatal("Nonexistent memory name: " + memory_name)
# Generator for recurrent layer group, to use it: # Generator for recurrent layer group, to use it:
# 1. define a id layer as output of layer group # 1. define a id layer as output of layer group
# 2. define a memory of this id layer, and assign a boot id(begin of sequence) # 2. define a memory of this id layer, and assign a boot id(begin of sequence)
......
...@@ -97,13 +97,13 @@ def reset_hook(): ...@@ -97,13 +97,13 @@ def reset_hook():
register_parse_config_hook(reset_hook) register_parse_config_hook(reset_hook)
def wrap_name_default(name_prefix=None): def wrap_name_default(name_prefix=None, name_param="name"):
""" """
Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}". Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
.. code:: python .. code:: python
@default_name("some_name") @wrap_name_default("some_name")
def func(name=None): def func(name=None):
print name # name will never be None. If name is not set, print name # name will never be None. If name is not set,
# name will be "some_name_%d" # name will be "some_name_%d"
...@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None): ...@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
""" """
factory = DefaultNameFactory(name_prefix) factory = DefaultNameFactory(name_prefix)
_name_factories.append(factory) _name_factories.append(factory)
return wrap_param_default(["name"], factory) return wrap_param_default([name_param], factory)
def wrap_param_attr_default(param_names=None, default_factory=None): def wrap_param_attr_default(param_names=None, default_factory=None):
......
...@@ -112,6 +112,7 @@ __all__ = [ ...@@ -112,6 +112,7 @@ __all__ = [
'out_prod_layer', 'out_prod_layer',
'print_layer', 'print_layer',
'priorbox_layer', 'priorbox_layer',
'cross_channel_norm_layer',
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer', 'eos_layer',
...@@ -288,6 +289,14 @@ class LayerOutput(object): ...@@ -288,6 +289,14 @@ class LayerOutput(object):
""" """
assert False, "this method should not be invoked" assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
"""
assert isinstance(input, LayerOutput)
assert self.layer_type == LayerType.MEMORY
SetMemoryInput(self.name, input.name)
ERROR_CLIPPING = 'error_clipping_threshold' ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT = 'drop_rate' DROPOUT = 'drop_rate'
...@@ -704,8 +713,9 @@ class MixedLayerType(LayerOutput): ...@@ -704,8 +713,9 @@ class MixedLayerType(LayerOutput):
assert len(self.inputs) == 0 assert len(self.inputs) == 0
return self return self
def __exit__(self, *args, **kwargs): def __exit__(self, exc_type, exc_value, tb):
del args, kwargs # unused parameter to suppress warning if exc_value is not None:
raise exc_value
assert len(self.inputs) != 0 assert len(self.inputs) != 0
ml = MixedLayer( ml = MixedLayer(
name=self.name, name=self.name,
...@@ -999,6 +1009,46 @@ def priorbox_layer(input, ...@@ -999,6 +1009,46 @@ def priorbox_layer(input,
size=size) size=size)
@wrap_name_default("cross_channel_norm")
def cross_channel_norm_layer(input, name=None, param_attr=None):
"""
Normalize a layer's output. This layer is necessary for ssd.
This layer applys normalize across the channels of each sample to
a conv layer's output and scale the output by a group of trainable
factors which dimensions equal to the channel's number.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
:return: LayerOutput
"""
assert input.num_filters is not None
Layer(
name=name,
type=LayerType.NORM_LAYER,
inputs=[
Input(
input.name,
norm=Norm(
norm_type="cross-channel-norm",
channels=input.num_filters,
size=input.size,
scale=0,
pow=0,
blocked=0),
**param_attr.attr)
])
return LayerOutput(
name,
LayerType.NORM_LAYER,
parents=input,
num_filters=input.num_filters,
size=input.size)
@wrap_name_default("seq_pooling") @wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
...@@ -2036,8 +2086,9 @@ def img_conv_layer(input, ...@@ -2036,8 +2086,9 @@ def img_conv_layer(input,
:param trans: true if it is a convTransLayer, false if it is a convLayer :param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool :type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True, :param layer_type: specify the layer_type, default is None. If trans=True,
layer_type has to be "exconvt", otherwise layer_type layer_type has to be "exconvt" or "cudnn_convt",
has to be either "exconv" or "cudnn_conv" otherwise layer_type has to be either "exconv" or
"cudnn_conv"
:type layer_type: String :type layer_type: String
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -2077,7 +2128,7 @@ def img_conv_layer(input, ...@@ -2077,7 +2128,7 @@ def img_conv_layer(input,
if layer_type: if layer_type:
if trans: if trans:
assert layer_type in ["exconvt"] assert layer_type in ["exconvt", "cudnn_convt"]
else: else:
assert layer_type in ["exconv", "cudnn_conv"] assert layer_type in ["exconv", "cudnn_conv"]
lt = layer_type lt = layer_type
...@@ -2759,8 +2810,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -2759,8 +2810,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
size=a.size) size=a.size)
@wrap_name_default("memory", "memory_name")
def memory(name, def memory(name,
size, size,
memory_name=None,
is_seq=False, is_seq=False,
boot_layer=None, boot_layer=None,
boot_bias=None, boot_bias=None,
...@@ -2782,14 +2835,32 @@ def memory(name, ...@@ -2782,14 +2835,32 @@ def memory(name,
If boot_layer is not null, the memory is just the boot_layer's output. If boot_layer is not null, the memory is just the boot_layer's output.
Set :code:`is_seq` is true boot layer is sequence. Set :code:`is_seq` is true boot layer is sequence.
The same name layer in recurrent group will set memory on each time The same name layer in recurrent group will set memory on each time
step. step.
:param name: memory's name. .. code-block:: python
mem = memory(size=256, name='state')
state = fc_layer(input=mem, size=256, name='state')
If you do not want to specify the name, you can equivalently use set_input()
to specify the layer needs to be remembered as the following:
.. code-block:: python
mem = memory(size=256)
state = fc_layer(input=mem, size=256)
mem.set_input(mem)
:param name: the name of the layer which this memory remembers.
If name is None, user should call set_input() to specify the
name of the layer which this memory remembers.
:type name: basestring :type name: basestring
:param size: size of memory. :param size: size of memory.
:type size: int :type size: int
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer :param is_seq: is sequence for boot_layer
:type is_seq: bool :type is_seq: bool
:param boot_layer: boot layer of memory. :param boot_layer: boot layer of memory.
...@@ -2811,13 +2882,21 @@ def memory(name, ...@@ -2811,13 +2882,21 @@ def memory(name,
boot_bias = ParamAttr.to_bias(boot_bias) boot_bias = ParamAttr.to_bias(boot_bias)
assert boot_layer is None or isinstance(boot_layer, LayerOutput) assert boot_layer is None or isinstance(boot_layer, LayerOutput)
if name is not None:
memory_name = None
agent_name = Memory(name, size, is_seq, boot_layer.name memory_name = Memory(
if boot_layer is not None else None, boot_bias, name,
boot_bias_active_type.name, boot_with_const_id) size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
boot_with_const_id=boot_with_const_id,
memory_name=memory_name)
lout = LayerOutput( lout = LayerOutput(
name=agent_name, name=memory_name,
size=size, size=size,
layer_type=LayerType.MEMORY, layer_type=LayerType.MEMORY,
parents=[boot_layer] if boot_layer is not None else None) parents=[boot_layer] if boot_layer is not None else None)
...@@ -3565,7 +3644,7 @@ def __cost_input__(input, label, weight=None): ...@@ -3565,7 +3644,7 @@ def __cost_input__(input, label, weight=None):
ipts = [Input(input.name), Input(label.name)] ipts = [Input(input.name), Input(label.name)]
parents = [input, label] parents = [input, label]
if weight is not None: if weight is not None:
assert weight.layer_type == LayerType.DATA assert weight.size == 1
ipts.append(Input(weight.name)) ipts.append(Input(weight.name))
parents.append(weight) parents.append(weight)
return ipts, parents return ipts, parents
...@@ -3679,7 +3758,8 @@ def conv_operator(img, ...@@ -3679,7 +3758,8 @@ def conv_operator(img,
padding=0, padding=0,
filter_size_y=None, filter_size_y=None,
stride_y=None, stride_y=None,
padding_y=None): padding_y=None,
trans=False):
""" """
Different from img_conv_layer, conv_op is an Operator, which can be used Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution. in mixed_layer. And conv_op takes two inputs to perform convolution.
...@@ -3735,7 +3815,9 @@ def conv_operator(img, ...@@ -3735,7 +3815,9 @@ def conv_operator(img,
if filter.size is not None: if filter.size is not None:
filter.size = filter_size * filter_size_y * num_filters * num_channels filter.size = filter_size * filter_size_y * num_filters * num_channels
op = ConvOperator( opCls = ConvTransOperator if trans else ConvOperator
op = opCls(
input_layer_names=[img.name, filter.name], input_layer_names=[img.name, filter.name],
num_filters=num_filters, num_filters=num_filters,
conv_conf=Conv( conv_conf=Conv(
...@@ -3747,6 +3829,7 @@ def conv_operator(img, ...@@ -3747,6 +3829,7 @@ def conv_operator(img,
padding_y=padding_y, padding_y=padding_y,
stride_y=stride_y, stride_y=stride_y,
groups=1)) groups=1))
op.origin = [img, filter] op.origin = [img, filter]
return op return op
...@@ -3762,7 +3845,8 @@ def conv_projection(input, ...@@ -3762,7 +3845,8 @@ def conv_projection(input,
stride_y=None, stride_y=None,
padding_y=None, padding_y=None,
groups=1, groups=1,
param_attr=None): param_attr=None,
trans=False):
""" """
Different from img_conv_layer and conv_op, conv_projection is an Projection, Different from img_conv_layer and conv_op, conv_projection is an Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement which can be used in mixed_layer and conat_layer. It use cudnn to implement
...@@ -3801,6 +3885,8 @@ def conv_projection(input, ...@@ -3801,6 +3885,8 @@ def conv_projection(input,
:type groups: int :type groups: int
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: Convolution param attribute. None means default attribute
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv
:type trans: boolean
:return: A DotMulProjection Object. :return: A DotMulProjection Object.
:rtype: DotMulProjection :rtype: DotMulProjection
""" """
...@@ -3837,7 +3923,9 @@ def conv_projection(input, ...@@ -3837,7 +3923,9 @@ def conv_projection(input,
param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False param_attr.attr["initial_smart"] = False
proj = ConvProjection( projCls = ConvTransProjection if trans else ConvProjection
proj = projCls(
input_layer_name=input.name, input_layer_name=input.name,
num_filters=num_filters, num_filters=num_filters,
conv_conf=Conv( conv_conf=Conv(
...@@ -4946,7 +5034,12 @@ def lambda_cost(input, ...@@ -4946,7 +5034,12 @@ def lambda_cost(input,
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): def cross_entropy(input,
label,
name=None,
coeff=1.0,
weight=None,
layer_attr=None):
""" """
A loss layer for multi class entropy. A loss layer for multi class entropy.
...@@ -4961,22 +5054,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4961,22 +5054,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layers. It is not necessary. :param name: The name of this layers. It is not necessary.
:type name: None|basestring. :type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward.
:type coeff: float. :type coeff: float.
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:type weight: LayerOutout
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput. :rtype: LayerOutput.
""" """
ipts, parents = __cost_input__(input, label, weight)
Layer( Layer(
name=name, name=name,
type=LayerType.CROSS_ENTROPY, type=LayerType.CROSS_ENTROPY,
inputs=[input.name, label.name], inputs=ipts,
coeff=coeff, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput( return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
......
...@@ -34,11 +34,31 @@ flt = data_layer(name='filter', size=3 * 3 * 1 * 64) ...@@ -34,11 +34,31 @@ flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
with mixed_layer() as m7: with mixed_layer() as m7:
m7 += conv_operator( m7 += conv_operator(
img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3) img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1)
with mixed_layer() as m8:
m8 += conv_operator(
img=img,
filter=flt,
num_filters=64,
num_channels=1,
filter_size=3,
stride=2,
padding=1,
trans=True)
m8 += conv_projection(
img,
filter_size=3,
num_filters=64,
num_channels=1,
stride=2,
padding=1,
trans=True)
end = mixed_layer( end = mixed_layer(
input=[ input=[
full_matrix_projection(input=m5), full_matrix_projection(input=m5),
trans_full_matrix_projection(input=m6), full_matrix_projection(input=m7) trans_full_matrix_projection(input=m6),
full_matrix_projection(input=m7), full_matrix_projection(input=m8)
], ],
size=100, size=100,
layer_attr=ExtraAttr( layer_attr=ExtraAttr(
......
...@@ -33,6 +33,8 @@ layers { ...@@ -33,6 +33,8 @@ layers {
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
num_filters: 64 num_filters: 64
shared_biases: true shared_biases: true
height: 256
width: 256
} }
layers { layers {
name: "__batch_norm_0__" name: "__batch_norm_0__"
...@@ -58,6 +60,8 @@ layers { ...@@ -58,6 +60,8 @@ layers {
} }
bias_parameter_name: "___batch_norm_0__.wbias" bias_parameter_name: "___batch_norm_0__.wbias"
moving_average_fraction: 0.9 moving_average_fraction: 0.9
height: 256
width: 256
} }
layers { layers {
name: "__crmnorm_0__" name: "__crmnorm_0__"
......
...@@ -154,13 +154,40 @@ layers { ...@@ -154,13 +154,40 @@ layers {
inputs { inputs {
input_layer_name: "img" input_layer_name: "img"
} }
inputs {
input_layer_name: "img"
input_parameter_name: "___mixed_6__.w1"
proj_conf {
type: "conv"
name: "___mixed_6__.w1"
input_size: 1024
output_size: 57600
conv_conf {
filter_size: 3
channels: 1
stride: 1
padding: 0
groups: 1
filter_channels: 1
output_x: 30
img_size: 32
caffe_mode: true
filter_size_y: 3
padding_y: 0
stride_y: 1
output_y: 30
img_size_y: 32
}
num_filters: 64
}
}
inputs { inputs {
input_layer_name: "filter" input_layer_name: "filter"
} }
operator_confs { operator_confs {
type: "conv" type: "conv"
input_indices: 0 input_indices: 0
input_indices: 1 input_indices: 2
input_sizes: 1024 input_sizes: 1024
input_sizes: 576 input_sizes: 576
output_size: 57600 output_size: 57600
...@@ -186,38 +213,112 @@ layers { ...@@ -186,38 +213,112 @@ layers {
layers { layers {
name: "__mixed_7__" name: "__mixed_7__"
type: "mixed" type: "mixed"
size: 254016
active_type: ""
inputs {
input_layer_name: "img"
}
inputs {
input_layer_name: "img"
input_parameter_name: "___mixed_7__.w1"
proj_conf {
type: "convt"
name: "___mixed_7__.w1"
input_size: 1024
output_size: 254016
conv_conf {
filter_size: 3
channels: 1
stride: 2
padding: 1
groups: 1
filter_channels: 64
output_x: 32
img_size: 63
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 32
img_size_y: 63
}
num_filters: 64
}
}
inputs {
input_layer_name: "filter"
}
operator_confs {
type: "convt"
input_indices: 0
input_indices: 2
input_sizes: 1024
input_sizes: 576
output_size: 254016
conv_conf {
filter_size: 3
channels: 1
stride: 2
padding: 1
groups: 1
filter_channels: 64
output_x: 32
img_size: 63
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 32
img_size_y: 63
}
num_filters: 64
}
}
layers {
name: "__mixed_8__"
type: "mixed"
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__mixed_4__" input_layer_name: "__mixed_4__"
input_parameter_name: "___mixed_7__.w0" input_parameter_name: "___mixed_8__.w0"
proj_conf { proj_conf {
type: "fc" type: "fc"
name: "___mixed_7__.w0" name: "___mixed_8__.w0"
input_size: 300 input_size: 300
output_size: 100 output_size: 100
} }
} }
inputs { inputs {
input_layer_name: "__mixed_5__" input_layer_name: "__mixed_5__"
input_parameter_name: "___mixed_7__.w1" input_parameter_name: "___mixed_8__.w1"
proj_conf { proj_conf {
type: "trans_fc" type: "trans_fc"
name: "___mixed_7__.w1" name: "___mixed_8__.w1"
input_size: 100 input_size: 100
output_size: 100 output_size: 100
} }
} }
inputs { inputs {
input_layer_name: "__mixed_6__" input_layer_name: "__mixed_6__"
input_parameter_name: "___mixed_7__.w2" input_parameter_name: "___mixed_8__.w2"
proj_conf { proj_conf {
type: "fc" type: "fc"
name: "___mixed_7__.w2" name: "___mixed_8__.w2"
input_size: 57600 input_size: 57600
output_size: 100 output_size: 100
} }
} }
inputs {
input_layer_name: "__mixed_7__"
input_parameter_name: "___mixed_8__.w3"
proj_conf {
type: "fc"
name: "___mixed_8__.w3"
input_size: 254016
output_size: 100
}
}
drop_rate: 0.5 drop_rate: 0.5
} }
parameters { parameters {
...@@ -281,7 +382,23 @@ parameters { ...@@ -281,7 +382,23 @@ parameters {
initial_smart: true initial_smart: true
} }
parameters { parameters {
name: "___mixed_7__.w0" name: "___mixed_6__.w1"
size: 576
initial_mean: 0.0
initial_std: 0.471404520791
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___mixed_7__.w1"
size: 576
initial_mean: 0.0
initial_std: 0.471404520791
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___mixed_8__.w0"
size: 30000 size: 30000
initial_mean: 0.0 initial_mean: 0.0
initial_std: 0.057735026919 initial_std: 0.057735026919
...@@ -291,7 +408,7 @@ parameters { ...@@ -291,7 +408,7 @@ parameters {
initial_smart: true initial_smart: true
} }
parameters { parameters {
name: "___mixed_7__.w1" name: "___mixed_8__.w1"
size: 10000 size: 10000
initial_mean: 0.0 initial_mean: 0.0
initial_std: 0.1 initial_std: 0.1
...@@ -301,7 +418,7 @@ parameters { ...@@ -301,7 +418,7 @@ parameters {
initial_smart: true initial_smart: true
} }
parameters { parameters {
name: "___mixed_7__.w2" name: "___mixed_8__.w2"
size: 5760000 size: 5760000
initial_mean: 0.0 initial_mean: 0.0
initial_std: 0.00416666666667 initial_std: 0.00416666666667
...@@ -310,10 +427,20 @@ parameters { ...@@ -310,10 +427,20 @@ parameters {
initial_strategy: 0 initial_strategy: 0
initial_smart: true initial_smart: true
} }
parameters {
name: "___mixed_8__.w3"
size: 25401600
initial_mean: 0.0
initial_std: 0.00198412698413
dims: 254016
dims: 100
initial_strategy: 0
initial_smart: true
}
input_layer_names: "test" input_layer_names: "test"
input_layer_names: "img" input_layer_names: "img"
input_layer_names: "filter" input_layer_names: "filter"
output_layer_names: "__mixed_7__" output_layer_names: "__mixed_8__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "test" layer_names: "test"
...@@ -328,10 +455,11 @@ sub_models { ...@@ -328,10 +455,11 @@ sub_models {
layer_names: "filter" layer_names: "filter"
layer_names: "__mixed_6__" layer_names: "__mixed_6__"
layer_names: "__mixed_7__" layer_names: "__mixed_7__"
layer_names: "__mixed_8__"
input_layer_names: "test" input_layer_names: "test"
input_layer_names: "img" input_layer_names: "img"
input_layer_names: "filter" input_layer_names: "filter"
output_layer_names: "__mixed_7__" output_layer_names: "__mixed_8__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
...@@ -331,6 +331,54 @@ layers { ...@@ -331,6 +331,54 @@ layers {
} }
trans_type: "non-seq" trans_type: "non-seq"
} }
layers {
name: "__recurrent_group_3__"
type: "recurrent_layer_group"
active_type: ""
}
layers {
name: "seq_input@__recurrent_group_3__"
type: "scatter_agent"
size: 100
active_type: ""
}
layers {
name: "__memory_6__@__recurrent_group_3__"
type: "agent"
size: 200
active_type: ""
}
layers {
name: "__fc_layer_0__@__recurrent_group_3__"
type: "fc"
size: 200
active_type: "tanh"
inputs {
input_layer_name: "seq_input@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
}
inputs {
input_layer_name: "__memory_6__@__recurrent_group_3__"
input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
}
bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
}
layers {
name: "__fc_layer_0__"
type: "gather_agent"
size: 200
active_type: ""
}
layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
inputs {
input_layer_name: "__fc_layer_0__"
}
trans_type: "non-seq"
}
parameters { parameters {
name: "___mixed_0__.w0" name: "___mixed_0__.w0"
size: 40000 size: 40000
...@@ -481,6 +529,36 @@ parameters { ...@@ -481,6 +529,36 @@ parameters {
initial_strategy: 0 initial_strategy: 0
initial_smart: false initial_smart: false
} }
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.w1"
size: 40000
initial_mean: 0.0
initial_std: 0.0707106781187
dims: 200
dims: 200
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__@__recurrent_group_3__.wbias"
size: 200
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 200
initial_strategy: 0
initial_smart: false
}
input_layer_names: "seq_input" input_layer_names: "seq_input"
input_layer_names: "sub_seq_input" input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_0__"
...@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__" ...@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__" output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "seq_input" layer_names: "seq_input"
...@@ -510,6 +589,9 @@ sub_models { ...@@ -510,6 +589,9 @@ sub_models {
layer_names: "__gru_group_0___recurrent_group" layer_names: "__gru_group_0___recurrent_group"
layer_names: "__gru_group_0__" layer_names: "__gru_group_0__"
layer_names: "__last_seq_3__" layer_names: "__last_seq_3__"
layer_names: "__recurrent_group_3__"
layer_names: "__fc_layer_0__"
layer_names: "__last_seq_4__"
input_layer_names: "seq_input" input_layer_names: "seq_input"
input_layer_names: "sub_seq_input" input_layer_names: "sub_seq_input"
output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_0__"
...@@ -517,6 +599,7 @@ sub_models { ...@@ -517,6 +599,7 @@ sub_models {
output_layer_names: "__last_seq_1__" output_layer_names: "__last_seq_1__"
output_layer_names: "__last_seq_2__" output_layer_names: "__last_seq_2__"
output_layer_names: "__last_seq_3__" output_layer_names: "__last_seq_3__"
output_layer_names: "__last_seq_4__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
sub_models { sub_models {
...@@ -647,4 +730,28 @@ sub_models { ...@@ -647,4 +730,28 @@ sub_models {
} }
target_inlinkid: -1 target_inlinkid: -1
} }
sub_models {
name: "__recurrent_group_3__"
layer_names: "seq_input@__recurrent_group_3__"
layer_names: "__memory_6__@__recurrent_group_3__"
layer_names: "__fc_layer_0__@__recurrent_group_3__"
is_recurrent_layer_group: true
reversed: false
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
...@@ -16,6 +16,16 @@ def generate_rnn_simple(name): ...@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
return rnn_simple return rnn_simple
def generate_rnn_simple_no_name():
def rnn_simple(s):
m = memory(name=None, size=200)
fc = fc_layer(input=[s, m], size=200)
m.set_input(fc)
return fc
return rnn_simple
with mixed_layer() as lstm_param: # test lstm unit, rnn group with mixed_layer() as lstm_param: # test lstm unit, rnn group
lstm_param += full_matrix_projection(input=seq, size=100 * 4) lstm_param += full_matrix_projection(input=seq, size=100 * 4)
...@@ -33,4 +43,6 @@ outputs( ...@@ -33,4 +43,6 @@ outputs(
last_seq(input=lstmemory_group( last_seq(input=lstmemory_group(
input=lstm_param, size=100)), input=lstm_param, size=100)),
last_seq(input=gru_group( last_seq(input=gru_group(
input=gru_param, size=100))) input=gru_param, size=100)),
last_seq(input=recurrent_group(
step=generate_rnn_simple_no_name(), input=seq)), )
...@@ -20,7 +20,7 @@ TODO(yuyang18): Complete the comments. ...@@ -20,7 +20,7 @@ TODO(yuyang18): Complete the comments.
import cPickle import cPickle
import itertools import itertools
import numpy import numpy
import paddle.v2.dataset.common from common import download
import tarfile import tarfile
__all__ = ['train100', 'test100', 'train10', 'test10'] __all__ = ['train100', 'test100', 'train10', 'test10']
...@@ -55,23 +55,23 @@ def reader_creator(filename, sub_name): ...@@ -55,23 +55,23 @@ def reader_creator(filename, sub_name):
def train100(): def train100():
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')
'train')
def test100(): def test100():
return reader_creator( return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'test')
def train10(): def train10():
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')
'data_batch')
def test10(): def test10():
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')
'test_batch')
def fetch():
download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
...@@ -17,6 +17,8 @@ import hashlib ...@@ -17,6 +17,8 @@ import hashlib
import os import os
import shutil import shutil
import sys import sys
import importlib
import paddle.v2.dataset
__all__ = ['DATA_HOME', 'download', 'md5file'] __all__ = ['DATA_HOME', 'download', 'md5file']
...@@ -69,3 +71,13 @@ def dict_add(a_dict, ele): ...@@ -69,3 +71,13 @@ def dict_add(a_dict, ele):
a_dict[ele] += 1 a_dict[ele] += 1
else: else:
a_dict[ele] = 1 a_dict[ele] = 1
def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)):
if "fetch" in dir(
importlib.import_module("paddle.v2.dataset.%s" % module_name)):
getattr(
importlib.import_module("paddle.v2.dataset.%s" % module_name),
"fetch")()
...@@ -196,3 +196,11 @@ def test(): ...@@ -196,3 +196,11 @@ def test():
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict) return reader_creator(reader, word_dict, verb_dict, label_dict)
def fetch():
download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
download(EMB_URL, 'conll05st', EMB_MD5)
download(DATA_URL, 'conll05st', DATA_MD5)
...@@ -123,3 +123,7 @@ def test(word_idx): ...@@ -123,3 +123,7 @@ def test(word_idx):
def word_dict(): def word_dict():
return build_dict( return build_dict(
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
def fetch():
paddle.v2.dataset.common.download(URL, 'imdb', MD5)
...@@ -89,3 +89,7 @@ def train(word_idx, n): ...@@ -89,3 +89,7 @@ def train(word_idx, n):
def test(word_idx, n): def test(word_idx, n):
return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
def fetch():
paddle.v2.dataset.common.download(URL, "imikolov", MD5)
...@@ -106,3 +106,10 @@ def test(): ...@@ -106,3 +106,10 @@ def test():
TEST_IMAGE_MD5), TEST_IMAGE_MD5),
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
TEST_LABEL_MD5), 100) TEST_LABEL_MD5), 100)
def fetch():
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
...@@ -30,6 +30,9 @@ __all__ = [ ...@@ -30,6 +30,9 @@ __all__ = [
age_table = [1, 18, 25, 35, 45, 50, 56] age_table = [1, 18, 25, 35, 45, 50, 56]
URL = 'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
MD5 = 'c4d9eecfca2ab87c1945afe126590906'
class MovieInfo(object): class MovieInfo(object):
def __init__(self, index, categories, title): def __init__(self, index, categories, title):
...@@ -77,10 +80,7 @@ USER_INFO = None ...@@ -77,10 +80,7 @@ USER_INFO = None
def __initialize_meta_info__(): def __initialize_meta_info__():
fn = download( fn = download(URL, "movielens", MD5)
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
module_name='movielens',
md5sum='c4d9eecfca2ab87c1945afe126590906')
global MOVIE_INFO global MOVIE_INFO
if MOVIE_INFO is None: if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$') pattern = re.compile(r'^(.*)\((\d+)\)$')
...@@ -205,5 +205,9 @@ def unittest(): ...@@ -205,5 +205,9 @@ def unittest():
print train_count, test_count print train_count, test_count
def fetch():
download(URL, "movielens", MD5)
if __name__ == '__main__': if __name__ == '__main__':
unittest() unittest()
...@@ -125,3 +125,7 @@ def test(): ...@@ -125,3 +125,7 @@ def test():
""" """
data_set = load_sentiment_data() data_set = load_sentiment_data()
return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
def fetch():
nltk.download('movie_reviews', download_dir=common.DATA_HOME)
...@@ -89,3 +89,7 @@ def test(): ...@@ -89,3 +89,7 @@ def test():
yield d[:-1], d[-1:] yield d[:-1], d[-1:]
return reader return reader
def fetch():
download(URL, 'uci_housing', MD5)
...@@ -16,7 +16,7 @@ wmt14 dataset ...@@ -16,7 +16,7 @@ wmt14 dataset
""" """
import tarfile import tarfile
import paddle.v2.dataset.common from paddle.v2.dataset.common import download
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict']
...@@ -95,11 +95,13 @@ def reader_creator(tar_file, file_name, dict_size): ...@@ -95,11 +95,13 @@ def reader_creator(tar_file, file_name, dict_size):
def train(dict_size): def train(dict_size):
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size)
'train/train', dict_size)
def test(dict_size): def test(dict_size):
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)
'test/test', dict_size)
def fetch():
download(URL_TRAIN, 'wmt14', MD5_TRAIN)
...@@ -22,7 +22,9 @@ import paddle.v2.networks as networks ...@@ -22,7 +22,9 @@ import paddle.v2.networks as networks
pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10)) label = layer.data(name='label', type=data_type.integer_value(10))
weight = layer.data(name='weight', type=data_type.dense_vector(10)) weight = layer.data(name='weight', type=data_type.dense_vector(1))
combine_weight = layer.data(
name='weight_combine', type=data_type.dense_vector(10))
score = layer.data(name='score', type=data_type.dense_vector(1)) score = layer.data(name='score', type=data_type.dense_vector(1))
hidden = layer.fc(input=pixel, hidden = layer.fc(input=pixel,
...@@ -81,7 +83,8 @@ class AggregateLayerTest(unittest.TestCase): ...@@ -81,7 +83,8 @@ class AggregateLayerTest(unittest.TestCase):
class MathLayerTest(unittest.TestCase): class MathLayerTest(unittest.TestCase):
def test_math_layer(self): def test_math_layer(self):
addto = layer.addto(input=[pixel, pixel]) addto = layer.addto(input=[pixel, pixel])
linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) linear_comb = layer.linear_comb(
weights=combine_weight, vectors=hidden, size=10)
interpolation = layer.interpolation( interpolation = layer.interpolation(
input=[hidden, hidden], weight=score) input=[hidden, hidden], weight=score)
bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册